1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s 11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s 12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s 13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s 14; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s 15 16define amdgpu_kernel void @global_system_unordered_load( 17; GFX6-LABEL: global_system_unordered_load: 18; GFX6: ; %bb.0: ; %entry 19; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 20; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 21; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 22; GFX6-NEXT: s_waitcnt lgkmcnt(0) 23; GFX6-NEXT: s_mov_b32 s6, s9 24; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 25; GFX6-NEXT: s_mov_b32 s12, 0x100f000 26; GFX6-NEXT: s_mov_b32 s13, -1 27; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 28; GFX6-NEXT: s_mov_b32 s9, s6 29; GFX6-NEXT: s_mov_b32 s10, s13 30; GFX6-NEXT: s_mov_b32 s11, s12 31; GFX6-NEXT: s_mov_b32 s14, s5 32; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 33; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 34; GFX6-NEXT: s_mov_b32 s5, s14 35; GFX6-NEXT: s_mov_b32 s6, s13 36; GFX6-NEXT: s_mov_b32 s7, s12 37; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 38; GFX6-NEXT: s_waitcnt vmcnt(0) 39; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 40; GFX6-NEXT: s_endpgm 41; 42; GFX7-LABEL: global_system_unordered_load: 43; GFX7: ; %bb.0: ; %entry 44; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 45; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 46; GFX7-NEXT: s_waitcnt lgkmcnt(0) 47; GFX7-NEXT: v_mov_b32_e32 v0, s6 48; GFX7-NEXT: v_mov_b32_e32 v1, s7 49; GFX7-NEXT: flat_load_dword v2, v[0:1] 50; GFX7-NEXT: v_mov_b32_e32 v0, s4 51; GFX7-NEXT: v_mov_b32_e32 v1, s5 52; GFX7-NEXT: s_waitcnt vmcnt(0) 53; GFX7-NEXT: flat_store_dword v[0:1], v2 54; GFX7-NEXT: s_endpgm 55; 56; GFX10-WGP-LABEL: global_system_unordered_load: 57; GFX10-WGP: ; %bb.0: ; %entry 58; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 59; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 60; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 61; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 62; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 63; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 64; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 65; GFX10-WGP-NEXT: s_endpgm 66; 67; GFX10-CU-LABEL: global_system_unordered_load: 68; GFX10-CU: ; %bb.0: ; %entry 69; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 70; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 71; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 72; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 73; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 74; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 75; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 76; GFX10-CU-NEXT: s_endpgm 77; 78; SKIP-CACHE-INV-LABEL: global_system_unordered_load: 79; SKIP-CACHE-INV: ; %bb.0: ; %entry 80; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 81; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 82; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 83; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 84; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 85; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 86; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 87; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 88; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 89; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 90; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 91; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 92; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 93; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 94; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 95; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 96; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 97; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 98; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 99; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 100; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 101; SKIP-CACHE-INV-NEXT: s_endpgm 102; 103; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_load: 104; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 105; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 106; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 109; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 111; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 112; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 113; 114; GFX90A-TGSPLIT-LABEL: global_system_unordered_load: 115; GFX90A-TGSPLIT: ; %bb.0: ; %entry 116; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 117; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 119; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 120; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 121; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 122; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 123; GFX90A-TGSPLIT-NEXT: s_endpgm 124; 125; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_load: 126; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 127; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 128; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 129; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 130; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 131; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 132; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 133; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 134; GFX940-NOTTGSPLIT-NEXT: s_endpgm 135; 136; GFX940-TGSPLIT-LABEL: global_system_unordered_load: 137; GFX940-TGSPLIT: ; %bb.0: ; %entry 138; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 139; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 140; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 141; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 142; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 143; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 144; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 145; GFX940-TGSPLIT-NEXT: s_endpgm 146; 147; GFX11-WGP-LABEL: global_system_unordered_load: 148; GFX11-WGP: ; %bb.0: ; %entry 149; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 150; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 151; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 152; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 153; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 154; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 155; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 156; GFX11-WGP-NEXT: s_endpgm 157; 158; GFX11-CU-LABEL: global_system_unordered_load: 159; GFX11-CU: ; %bb.0: ; %entry 160; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 161; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 162; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 163; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 164; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 165; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 166; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 167; GFX11-CU-NEXT: s_endpgm 168; 169; GFX12-WGP-LABEL: global_system_unordered_load: 170; GFX12-WGP: ; %bb.0: ; %entry 171; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 172; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 173; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 174; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 175; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 176; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 177; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 178; GFX12-WGP-NEXT: s_endpgm 179; 180; GFX12-CU-LABEL: global_system_unordered_load: 181; GFX12-CU: ; %bb.0: ; %entry 182; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 183; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 184; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 185; GFX12-CU-NEXT: s_wait_kmcnt 0x0 186; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 187; GFX12-CU-NEXT: s_wait_loadcnt 0x0 188; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 189; GFX12-CU-NEXT: s_endpgm 190 ptr addrspace(1) %in, ptr addrspace(1) %out) { 191entry: 192 %val = load atomic i32, ptr addrspace(1) %in unordered, align 4 193 store i32 %val, ptr addrspace(1) %out 194 ret void 195} 196 197define amdgpu_kernel void @global_system_monotonic_load( 198; GFX6-LABEL: global_system_monotonic_load: 199; GFX6: ; %bb.0: ; %entry 200; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 201; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 202; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 203; GFX6-NEXT: s_waitcnt lgkmcnt(0) 204; GFX6-NEXT: s_mov_b32 s6, s9 205; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 206; GFX6-NEXT: s_mov_b32 s12, 0x100f000 207; GFX6-NEXT: s_mov_b32 s13, -1 208; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 209; GFX6-NEXT: s_mov_b32 s9, s6 210; GFX6-NEXT: s_mov_b32 s10, s13 211; GFX6-NEXT: s_mov_b32 s11, s12 212; GFX6-NEXT: s_mov_b32 s14, s5 213; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 214; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 215; GFX6-NEXT: s_mov_b32 s5, s14 216; GFX6-NEXT: s_mov_b32 s6, s13 217; GFX6-NEXT: s_mov_b32 s7, s12 218; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 219; GFX6-NEXT: s_waitcnt vmcnt(0) 220; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 221; GFX6-NEXT: s_endpgm 222; 223; GFX7-LABEL: global_system_monotonic_load: 224; GFX7: ; %bb.0: ; %entry 225; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 226; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 227; GFX7-NEXT: s_waitcnt lgkmcnt(0) 228; GFX7-NEXT: v_mov_b32_e32 v0, s6 229; GFX7-NEXT: v_mov_b32_e32 v1, s7 230; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 231; GFX7-NEXT: v_mov_b32_e32 v0, s4 232; GFX7-NEXT: v_mov_b32_e32 v1, s5 233; GFX7-NEXT: s_waitcnt vmcnt(0) 234; GFX7-NEXT: flat_store_dword v[0:1], v2 235; GFX7-NEXT: s_endpgm 236; 237; GFX10-WGP-LABEL: global_system_monotonic_load: 238; GFX10-WGP: ; %bb.0: ; %entry 239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 240; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 241; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 242; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 243; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 244; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 245; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 246; GFX10-WGP-NEXT: s_endpgm 247; 248; GFX10-CU-LABEL: global_system_monotonic_load: 249; GFX10-CU: ; %bb.0: ; %entry 250; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 251; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 252; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 253; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 254; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 255; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 256; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 257; GFX10-CU-NEXT: s_endpgm 258; 259; SKIP-CACHE-INV-LABEL: global_system_monotonic_load: 260; SKIP-CACHE-INV: ; %bb.0: ; %entry 261; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 262; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 263; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 265; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 266; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 267; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 268; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 269; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 270; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 271; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 272; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 273; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 274; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 275; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 276; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 277; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 278; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 279; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 280; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 281; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 282; SKIP-CACHE-INV-NEXT: s_endpgm 283; 284; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_load: 285; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 286; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 287; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 288; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 289; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 290; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 291; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 292; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 293; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 294; 295; GFX90A-TGSPLIT-LABEL: global_system_monotonic_load: 296; GFX90A-TGSPLIT: ; %bb.0: ; %entry 297; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 298; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 299; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 300; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 301; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 302; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 303; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 304; GFX90A-TGSPLIT-NEXT: s_endpgm 305; 306; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_load: 307; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 308; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 309; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 310; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 311; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 312; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 313; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 314; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 315; GFX940-NOTTGSPLIT-NEXT: s_endpgm 316; 317; GFX940-TGSPLIT-LABEL: global_system_monotonic_load: 318; GFX940-TGSPLIT: ; %bb.0: ; %entry 319; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 320; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 321; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 322; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 323; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 324; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 325; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 326; GFX940-TGSPLIT-NEXT: s_endpgm 327; 328; GFX11-WGP-LABEL: global_system_monotonic_load: 329; GFX11-WGP: ; %bb.0: ; %entry 330; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 331; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 332; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 333; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 334; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 335; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 336; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 337; GFX11-WGP-NEXT: s_endpgm 338; 339; GFX11-CU-LABEL: global_system_monotonic_load: 340; GFX11-CU: ; %bb.0: ; %entry 341; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 342; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 343; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 344; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 345; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 346; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 347; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 348; GFX11-CU-NEXT: s_endpgm 349; 350; GFX12-WGP-LABEL: global_system_monotonic_load: 351; GFX12-WGP: ; %bb.0: ; %entry 352; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 353; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 354; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 355; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 356; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 357; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 358; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 359; GFX12-WGP-NEXT: s_endpgm 360; 361; GFX12-CU-LABEL: global_system_monotonic_load: 362; GFX12-CU: ; %bb.0: ; %entry 363; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 364; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 365; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 366; GFX12-CU-NEXT: s_wait_kmcnt 0x0 367; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 368; GFX12-CU-NEXT: s_wait_loadcnt 0x0 369; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 370; GFX12-CU-NEXT: s_endpgm 371 ptr addrspace(1) %in, ptr addrspace(1) %out) { 372entry: 373 %val = load atomic i32, ptr addrspace(1) %in monotonic, align 4 374 store i32 %val, ptr addrspace(1) %out 375 ret void 376} 377 378define amdgpu_kernel void @global_system_acquire_load( 379; GFX6-LABEL: global_system_acquire_load: 380; GFX6: ; %bb.0: ; %entry 381; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 382; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 383; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 384; GFX6-NEXT: s_waitcnt lgkmcnt(0) 385; GFX6-NEXT: s_mov_b32 s6, s9 386; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 387; GFX6-NEXT: s_mov_b32 s12, 0x100f000 388; GFX6-NEXT: s_mov_b32 s13, -1 389; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 390; GFX6-NEXT: s_mov_b32 s9, s6 391; GFX6-NEXT: s_mov_b32 s10, s13 392; GFX6-NEXT: s_mov_b32 s11, s12 393; GFX6-NEXT: s_mov_b32 s14, s5 394; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 395; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 396; GFX6-NEXT: s_mov_b32 s5, s14 397; GFX6-NEXT: s_mov_b32 s6, s13 398; GFX6-NEXT: s_mov_b32 s7, s12 399; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 400; GFX6-NEXT: s_waitcnt vmcnt(0) 401; GFX6-NEXT: buffer_wbinvl1 402; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 403; GFX6-NEXT: s_endpgm 404; 405; GFX7-LABEL: global_system_acquire_load: 406; GFX7: ; %bb.0: ; %entry 407; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 408; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 409; GFX7-NEXT: s_waitcnt lgkmcnt(0) 410; GFX7-NEXT: v_mov_b32_e32 v0, s6 411; GFX7-NEXT: v_mov_b32_e32 v1, s7 412; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 413; GFX7-NEXT: s_waitcnt vmcnt(0) 414; GFX7-NEXT: buffer_wbinvl1_vol 415; GFX7-NEXT: v_mov_b32_e32 v0, s4 416; GFX7-NEXT: v_mov_b32_e32 v1, s5 417; GFX7-NEXT: flat_store_dword v[0:1], v2 418; GFX7-NEXT: s_endpgm 419; 420; GFX10-WGP-LABEL: global_system_acquire_load: 421; GFX10-WGP: ; %bb.0: ; %entry 422; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 423; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 424; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 425; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 426; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 427; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 428; GFX10-WGP-NEXT: buffer_gl1_inv 429; GFX10-WGP-NEXT: buffer_gl0_inv 430; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 431; GFX10-WGP-NEXT: s_endpgm 432; 433; GFX10-CU-LABEL: global_system_acquire_load: 434; GFX10-CU: ; %bb.0: ; %entry 435; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 436; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 437; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 438; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 439; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 440; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 441; GFX10-CU-NEXT: buffer_gl1_inv 442; GFX10-CU-NEXT: buffer_gl0_inv 443; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 444; GFX10-CU-NEXT: s_endpgm 445; 446; SKIP-CACHE-INV-LABEL: global_system_acquire_load: 447; SKIP-CACHE-INV: ; %bb.0: ; %entry 448; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 449; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 450; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 451; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 452; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 453; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 454; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 455; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 456; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 457; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 458; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 459; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 460; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 461; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 462; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 463; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 464; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 465; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 466; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 467; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 468; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 469; SKIP-CACHE-INV-NEXT: s_endpgm 470; 471; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_load: 472; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 473; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 474; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 475; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 476; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 477; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 478; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 479; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 480; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 481; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 482; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 483; 484; GFX90A-TGSPLIT-LABEL: global_system_acquire_load: 485; GFX90A-TGSPLIT: ; %bb.0: ; %entry 486; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 487; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 488; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 489; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 490; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 491; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 492; GFX90A-TGSPLIT-NEXT: buffer_invl2 493; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 494; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 495; GFX90A-TGSPLIT-NEXT: s_endpgm 496; 497; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_load: 498; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 499; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 500; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 501; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 502; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 503; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 504; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 505; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 506; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 507; GFX940-NOTTGSPLIT-NEXT: s_endpgm 508; 509; GFX940-TGSPLIT-LABEL: global_system_acquire_load: 510; GFX940-TGSPLIT: ; %bb.0: ; %entry 511; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 512; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 513; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 514; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 515; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 516; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 517; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 518; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 519; GFX940-TGSPLIT-NEXT: s_endpgm 520; 521; GFX11-WGP-LABEL: global_system_acquire_load: 522; GFX11-WGP: ; %bb.0: ; %entry 523; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 524; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 525; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 526; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 527; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 528; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 529; GFX11-WGP-NEXT: buffer_gl1_inv 530; GFX11-WGP-NEXT: buffer_gl0_inv 531; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 532; GFX11-WGP-NEXT: s_endpgm 533; 534; GFX11-CU-LABEL: global_system_acquire_load: 535; GFX11-CU: ; %bb.0: ; %entry 536; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 537; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 538; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 539; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 540; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 541; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 542; GFX11-CU-NEXT: buffer_gl1_inv 543; GFX11-CU-NEXT: buffer_gl0_inv 544; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 545; GFX11-CU-NEXT: s_endpgm 546; 547; GFX12-WGP-LABEL: global_system_acquire_load: 548; GFX12-WGP: ; %bb.0: ; %entry 549; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 550; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 551; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 552; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 553; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 554; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 555; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 556; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 557; GFX12-WGP-NEXT: s_endpgm 558; 559; GFX12-CU-LABEL: global_system_acquire_load: 560; GFX12-CU: ; %bb.0: ; %entry 561; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 562; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 563; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 564; GFX12-CU-NEXT: s_wait_kmcnt 0x0 565; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 566; GFX12-CU-NEXT: s_wait_loadcnt 0x0 567; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 568; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 569; GFX12-CU-NEXT: s_endpgm 570 ptr addrspace(1) %in, ptr addrspace(1) %out) { 571entry: 572 %val = load atomic i32, ptr addrspace(1) %in acquire, align 4 573 store i32 %val, ptr addrspace(1) %out 574 ret void 575} 576 577define amdgpu_kernel void @global_system_seq_cst_load( 578; GFX6-LABEL: global_system_seq_cst_load: 579; GFX6: ; %bb.0: ; %entry 580; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 581; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 582; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 583; GFX6-NEXT: s_waitcnt lgkmcnt(0) 584; GFX6-NEXT: s_mov_b32 s6, s9 585; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 586; GFX6-NEXT: s_mov_b32 s12, 0x100f000 587; GFX6-NEXT: s_mov_b32 s13, -1 588; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 589; GFX6-NEXT: s_mov_b32 s9, s6 590; GFX6-NEXT: s_mov_b32 s10, s13 591; GFX6-NEXT: s_mov_b32 s11, s12 592; GFX6-NEXT: s_mov_b32 s14, s5 593; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 594; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 595; GFX6-NEXT: s_mov_b32 s5, s14 596; GFX6-NEXT: s_mov_b32 s6, s13 597; GFX6-NEXT: s_mov_b32 s7, s12 598; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 599; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 600; GFX6-NEXT: s_waitcnt vmcnt(0) 601; GFX6-NEXT: buffer_wbinvl1 602; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 603; GFX6-NEXT: s_endpgm 604; 605; GFX7-LABEL: global_system_seq_cst_load: 606; GFX7: ; %bb.0: ; %entry 607; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 608; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 609; GFX7-NEXT: s_waitcnt lgkmcnt(0) 610; GFX7-NEXT: v_mov_b32_e32 v0, s6 611; GFX7-NEXT: v_mov_b32_e32 v1, s7 612; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 613; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 614; GFX7-NEXT: s_waitcnt vmcnt(0) 615; GFX7-NEXT: buffer_wbinvl1_vol 616; GFX7-NEXT: v_mov_b32_e32 v0, s4 617; GFX7-NEXT: v_mov_b32_e32 v1, s5 618; GFX7-NEXT: flat_store_dword v[0:1], v2 619; GFX7-NEXT: s_endpgm 620; 621; GFX10-WGP-LABEL: global_system_seq_cst_load: 622; GFX10-WGP: ; %bb.0: ; %entry 623; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 624; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 625; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 626; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 627; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 628; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 629; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 630; GFX10-WGP-NEXT: buffer_gl1_inv 631; GFX10-WGP-NEXT: buffer_gl0_inv 632; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 633; GFX10-WGP-NEXT: s_endpgm 634; 635; GFX10-CU-LABEL: global_system_seq_cst_load: 636; GFX10-CU: ; %bb.0: ; %entry 637; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 638; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 639; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 640; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 641; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 642; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 643; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 644; GFX10-CU-NEXT: buffer_gl1_inv 645; GFX10-CU-NEXT: buffer_gl0_inv 646; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 647; GFX10-CU-NEXT: s_endpgm 648; 649; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load: 650; SKIP-CACHE-INV: ; %bb.0: ; %entry 651; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 652; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 653; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 654; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 655; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 656; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 657; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 658; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 659; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 660; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 661; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 662; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 663; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 664; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 665; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 666; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 667; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 668; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 669; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 670; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 671; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 672; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 673; SKIP-CACHE-INV-NEXT: s_endpgm 674; 675; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_load: 676; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 677; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 678; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 679; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 680; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 681; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 682; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 683; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 684; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 685; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 686; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 687; 688; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_load: 689; GFX90A-TGSPLIT: ; %bb.0: ; %entry 690; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 691; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 692; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 693; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 694; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 695; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 696; GFX90A-TGSPLIT-NEXT: buffer_invl2 697; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 698; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 699; GFX90A-TGSPLIT-NEXT: s_endpgm 700; 701; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_load: 702; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 703; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 704; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 705; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 706; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 707; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 708; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 709; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 710; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 711; GFX940-NOTTGSPLIT-NEXT: s_endpgm 712; 713; GFX940-TGSPLIT-LABEL: global_system_seq_cst_load: 714; GFX940-TGSPLIT: ; %bb.0: ; %entry 715; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 716; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 717; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 718; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 719; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 720; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 721; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 722; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 723; GFX940-TGSPLIT-NEXT: s_endpgm 724; 725; GFX11-WGP-LABEL: global_system_seq_cst_load: 726; GFX11-WGP: ; %bb.0: ; %entry 727; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 728; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 729; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 730; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 731; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 732; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 733; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 734; GFX11-WGP-NEXT: buffer_gl1_inv 735; GFX11-WGP-NEXT: buffer_gl0_inv 736; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 737; GFX11-WGP-NEXT: s_endpgm 738; 739; GFX11-CU-LABEL: global_system_seq_cst_load: 740; GFX11-CU: ; %bb.0: ; %entry 741; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 742; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 743; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 744; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 745; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 746; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 747; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 748; GFX11-CU-NEXT: buffer_gl1_inv 749; GFX11-CU-NEXT: buffer_gl0_inv 750; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 751; GFX11-CU-NEXT: s_endpgm 752; 753; GFX12-WGP-LABEL: global_system_seq_cst_load: 754; GFX12-WGP: ; %bb.0: ; %entry 755; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 756; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 757; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 758; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 759; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 760; GFX12-WGP-NEXT: s_wait_storecnt 0x0 761; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 762; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 763; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 764; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 765; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 766; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 767; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 768; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 769; GFX12-WGP-NEXT: s_endpgm 770; 771; GFX12-CU-LABEL: global_system_seq_cst_load: 772; GFX12-CU: ; %bb.0: ; %entry 773; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 774; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 775; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 776; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 777; GFX12-CU-NEXT: s_wait_samplecnt 0x0 778; GFX12-CU-NEXT: s_wait_storecnt 0x0 779; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 780; GFX12-CU-NEXT: s_wait_kmcnt 0x0 781; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 782; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 783; GFX12-CU-NEXT: s_wait_samplecnt 0x0 784; GFX12-CU-NEXT: s_wait_loadcnt 0x0 785; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 786; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 787; GFX12-CU-NEXT: s_endpgm 788 ptr addrspace(1) %in, ptr addrspace(1) %out) { 789entry: 790 %val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4 791 store i32 %val, ptr addrspace(1) %out 792 ret void 793} 794 795define amdgpu_kernel void @global_system_unordered_store( 796; GFX6-LABEL: global_system_unordered_store: 797; GFX6: ; %bb.0: ; %entry 798; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 799; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 800; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 801; GFX6-NEXT: s_waitcnt lgkmcnt(0) 802; GFX6-NEXT: s_mov_b32 s11, s5 803; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 804; GFX6-NEXT: s_mov_b32 s9, 0x100f000 805; GFX6-NEXT: s_mov_b32 s10, -1 806; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 807; GFX6-NEXT: s_mov_b32 s5, s11 808; GFX6-NEXT: s_mov_b32 s6, s10 809; GFX6-NEXT: s_mov_b32 s7, s9 810; GFX6-NEXT: v_mov_b32_e32 v0, s8 811; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 812; GFX6-NEXT: s_endpgm 813; 814; GFX7-LABEL: global_system_unordered_store: 815; GFX7: ; %bb.0: ; %entry 816; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 817; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 818; GFX7-NEXT: s_waitcnt lgkmcnt(0) 819; GFX7-NEXT: v_mov_b32_e32 v0, s6 820; GFX7-NEXT: v_mov_b32_e32 v1, s7 821; GFX7-NEXT: v_mov_b32_e32 v2, s4 822; GFX7-NEXT: flat_store_dword v[0:1], v2 823; GFX7-NEXT: s_endpgm 824; 825; GFX10-WGP-LABEL: global_system_unordered_store: 826; GFX10-WGP: ; %bb.0: ; %entry 827; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 828; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 829; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 830; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 831; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 832; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 833; GFX10-WGP-NEXT: s_endpgm 834; 835; GFX10-CU-LABEL: global_system_unordered_store: 836; GFX10-CU: ; %bb.0: ; %entry 837; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 838; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 839; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 840; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 841; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 842; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 843; GFX10-CU-NEXT: s_endpgm 844; 845; SKIP-CACHE-INV-LABEL: global_system_unordered_store: 846; SKIP-CACHE-INV: ; %bb.0: ; %entry 847; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 848; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 849; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 850; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 851; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 852; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 853; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 854; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 855; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 856; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 857; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 858; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 859; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 860; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 861; SKIP-CACHE-INV-NEXT: s_endpgm 862; 863; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_store: 864; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 865; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 866; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 867; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 868; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 869; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 870; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 871; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 872; 873; GFX90A-TGSPLIT-LABEL: global_system_unordered_store: 874; GFX90A-TGSPLIT: ; %bb.0: ; %entry 875; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 876; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 877; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 878; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 879; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 880; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 881; GFX90A-TGSPLIT-NEXT: s_endpgm 882; 883; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_store: 884; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 885; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 886; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 887; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 888; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 889; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 890; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 891; GFX940-NOTTGSPLIT-NEXT: s_endpgm 892; 893; GFX940-TGSPLIT-LABEL: global_system_unordered_store: 894; GFX940-TGSPLIT: ; %bb.0: ; %entry 895; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 896; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 897; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 898; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 899; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 900; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 901; GFX940-TGSPLIT-NEXT: s_endpgm 902; 903; GFX11-WGP-LABEL: global_system_unordered_store: 904; GFX11-WGP: ; %bb.0: ; %entry 905; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 906; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 907; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 908; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 909; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 910; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 911; GFX11-WGP-NEXT: s_endpgm 912; 913; GFX11-CU-LABEL: global_system_unordered_store: 914; GFX11-CU: ; %bb.0: ; %entry 915; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 916; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 917; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 918; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 919; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 920; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 921; GFX11-CU-NEXT: s_endpgm 922; 923; GFX12-WGP-LABEL: global_system_unordered_store: 924; GFX12-WGP: ; %bb.0: ; %entry 925; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 926; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 927; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 928; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 929; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 930; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 931; GFX12-WGP-NEXT: s_endpgm 932; 933; GFX12-CU-LABEL: global_system_unordered_store: 934; GFX12-CU: ; %bb.0: ; %entry 935; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 936; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 937; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 938; GFX12-CU-NEXT: s_wait_kmcnt 0x0 939; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 940; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 941; GFX12-CU-NEXT: s_endpgm 942 i32 %in, ptr addrspace(1) %out) { 943entry: 944 store atomic i32 %in, ptr addrspace(1) %out unordered, align 4 945 ret void 946} 947 948define amdgpu_kernel void @global_system_monotonic_store( 949; GFX6-LABEL: global_system_monotonic_store: 950; GFX6: ; %bb.0: ; %entry 951; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 952; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 953; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 954; GFX6-NEXT: s_waitcnt lgkmcnt(0) 955; GFX6-NEXT: s_mov_b32 s11, s5 956; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 957; GFX6-NEXT: s_mov_b32 s9, 0x100f000 958; GFX6-NEXT: s_mov_b32 s10, -1 959; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 960; GFX6-NEXT: s_mov_b32 s5, s11 961; GFX6-NEXT: s_mov_b32 s6, s10 962; GFX6-NEXT: s_mov_b32 s7, s9 963; GFX6-NEXT: v_mov_b32_e32 v0, s8 964; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 965; GFX6-NEXT: s_endpgm 966; 967; GFX7-LABEL: global_system_monotonic_store: 968; GFX7: ; %bb.0: ; %entry 969; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 970; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 971; GFX7-NEXT: s_waitcnt lgkmcnt(0) 972; GFX7-NEXT: v_mov_b32_e32 v0, s6 973; GFX7-NEXT: v_mov_b32_e32 v1, s7 974; GFX7-NEXT: v_mov_b32_e32 v2, s4 975; GFX7-NEXT: flat_store_dword v[0:1], v2 976; GFX7-NEXT: s_endpgm 977; 978; GFX10-WGP-LABEL: global_system_monotonic_store: 979; GFX10-WGP: ; %bb.0: ; %entry 980; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 981; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 982; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 983; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 984; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 985; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 986; GFX10-WGP-NEXT: s_endpgm 987; 988; GFX10-CU-LABEL: global_system_monotonic_store: 989; GFX10-CU: ; %bb.0: ; %entry 990; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 991; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 992; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 993; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 994; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 995; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 996; GFX10-CU-NEXT: s_endpgm 997; 998; SKIP-CACHE-INV-LABEL: global_system_monotonic_store: 999; SKIP-CACHE-INV: ; %bb.0: ; %entry 1000; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1001; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1002; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1003; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1004; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1005; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1006; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1007; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1008; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1009; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1010; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1011; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1012; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1013; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1014; SKIP-CACHE-INV-NEXT: s_endpgm 1015; 1016; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_store: 1017; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1018; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1019; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1020; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1021; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1022; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1023; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1024; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1025; 1026; GFX90A-TGSPLIT-LABEL: global_system_monotonic_store: 1027; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1028; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1029; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1030; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1031; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1032; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1033; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1034; GFX90A-TGSPLIT-NEXT: s_endpgm 1035; 1036; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_store: 1037; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1038; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1039; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1040; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1041; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1042; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1043; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1044; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1045; 1046; GFX940-TGSPLIT-LABEL: global_system_monotonic_store: 1047; GFX940-TGSPLIT: ; %bb.0: ; %entry 1048; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1049; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1050; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1051; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1052; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1053; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1054; GFX940-TGSPLIT-NEXT: s_endpgm 1055; 1056; GFX11-WGP-LABEL: global_system_monotonic_store: 1057; GFX11-WGP: ; %bb.0: ; %entry 1058; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1059; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1060; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1061; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1062; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1063; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1064; GFX11-WGP-NEXT: s_endpgm 1065; 1066; GFX11-CU-LABEL: global_system_monotonic_store: 1067; GFX11-CU: ; %bb.0: ; %entry 1068; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1069; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1070; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1071; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1072; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1073; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1074; GFX11-CU-NEXT: s_endpgm 1075; 1076; GFX12-WGP-LABEL: global_system_monotonic_store: 1077; GFX12-WGP: ; %bb.0: ; %entry 1078; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1079; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1080; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1081; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1082; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1083; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1084; GFX12-WGP-NEXT: s_endpgm 1085; 1086; GFX12-CU-LABEL: global_system_monotonic_store: 1087; GFX12-CU: ; %bb.0: ; %entry 1088; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1089; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1090; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1091; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1092; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1093; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1094; GFX12-CU-NEXT: s_endpgm 1095 i32 %in, ptr addrspace(1) %out) { 1096entry: 1097 store atomic i32 %in, ptr addrspace(1) %out monotonic, align 4 1098 ret void 1099} 1100 1101define amdgpu_kernel void @global_system_release_store( 1102; GFX6-LABEL: global_system_release_store: 1103; GFX6: ; %bb.0: ; %entry 1104; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1105; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1106; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1107; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1108; GFX6-NEXT: s_mov_b32 s11, s5 1109; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1110; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1111; GFX6-NEXT: s_mov_b32 s10, -1 1112; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1113; GFX6-NEXT: s_mov_b32 s5, s11 1114; GFX6-NEXT: s_mov_b32 s6, s10 1115; GFX6-NEXT: s_mov_b32 s7, s9 1116; GFX6-NEXT: v_mov_b32_e32 v0, s8 1117; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1118; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1119; GFX6-NEXT: s_endpgm 1120; 1121; GFX7-LABEL: global_system_release_store: 1122; GFX7: ; %bb.0: ; %entry 1123; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1124; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1125; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1126; GFX7-NEXT: v_mov_b32_e32 v0, s6 1127; GFX7-NEXT: v_mov_b32_e32 v1, s7 1128; GFX7-NEXT: v_mov_b32_e32 v2, s4 1129; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1130; GFX7-NEXT: flat_store_dword v[0:1], v2 1131; GFX7-NEXT: s_endpgm 1132; 1133; GFX10-WGP-LABEL: global_system_release_store: 1134; GFX10-WGP: ; %bb.0: ; %entry 1135; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1136; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1137; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1138; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1139; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1140; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1141; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1142; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1143; GFX10-WGP-NEXT: s_endpgm 1144; 1145; GFX10-CU-LABEL: global_system_release_store: 1146; GFX10-CU: ; %bb.0: ; %entry 1147; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1148; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1149; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1150; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1151; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1152; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1153; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1154; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1155; GFX10-CU-NEXT: s_endpgm 1156; 1157; SKIP-CACHE-INV-LABEL: global_system_release_store: 1158; SKIP-CACHE-INV: ; %bb.0: ; %entry 1159; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1160; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1161; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1162; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1163; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1164; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1165; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1166; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1167; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1168; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1169; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1170; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1171; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1172; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1173; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1174; SKIP-CACHE-INV-NEXT: s_endpgm 1175; 1176; GFX90A-NOTTGSPLIT-LABEL: global_system_release_store: 1177; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1178; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1179; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1180; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1181; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1182; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1183; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1184; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1185; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1186; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1187; 1188; GFX90A-TGSPLIT-LABEL: global_system_release_store: 1189; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1190; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1191; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1192; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1193; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1194; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1195; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1196; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1197; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1198; GFX90A-TGSPLIT-NEXT: s_endpgm 1199; 1200; GFX940-NOTTGSPLIT-LABEL: global_system_release_store: 1201; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1202; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1203; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1204; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1205; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1206; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1207; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1208; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1209; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1210; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1211; 1212; GFX940-TGSPLIT-LABEL: global_system_release_store: 1213; GFX940-TGSPLIT: ; %bb.0: ; %entry 1214; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1215; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1216; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1217; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1218; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1219; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1220; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1221; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1222; GFX940-TGSPLIT-NEXT: s_endpgm 1223; 1224; GFX11-WGP-LABEL: global_system_release_store: 1225; GFX11-WGP: ; %bb.0: ; %entry 1226; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1227; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1228; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1229; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1230; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1231; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1232; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1233; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1234; GFX11-WGP-NEXT: s_endpgm 1235; 1236; GFX11-CU-LABEL: global_system_release_store: 1237; GFX11-CU: ; %bb.0: ; %entry 1238; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1239; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1240; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1241; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1242; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1243; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1244; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1245; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1246; GFX11-CU-NEXT: s_endpgm 1247; 1248; GFX12-WGP-LABEL: global_system_release_store: 1249; GFX12-WGP: ; %bb.0: ; %entry 1250; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1251; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1252; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1253; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1254; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1255; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1256; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1257; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1258; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1259; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1260; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1261; GFX12-WGP-NEXT: s_endpgm 1262; 1263; GFX12-CU-LABEL: global_system_release_store: 1264; GFX12-CU: ; %bb.0: ; %entry 1265; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1266; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1267; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1268; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1269; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1270; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1271; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1272; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1273; GFX12-CU-NEXT: s_wait_storecnt 0x0 1274; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1275; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1276; GFX12-CU-NEXT: s_endpgm 1277 i32 %in, ptr addrspace(1) %out) { 1278entry: 1279 store atomic i32 %in, ptr addrspace(1) %out release, align 4 1280 ret void 1281} 1282 1283define amdgpu_kernel void @global_system_seq_cst_store( 1284; GFX6-LABEL: global_system_seq_cst_store: 1285; GFX6: ; %bb.0: ; %entry 1286; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1287; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1288; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1289; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1290; GFX6-NEXT: s_mov_b32 s11, s5 1291; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1292; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1293; GFX6-NEXT: s_mov_b32 s10, -1 1294; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1295; GFX6-NEXT: s_mov_b32 s5, s11 1296; GFX6-NEXT: s_mov_b32 s6, s10 1297; GFX6-NEXT: s_mov_b32 s7, s9 1298; GFX6-NEXT: v_mov_b32_e32 v0, s8 1299; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1300; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1301; GFX6-NEXT: s_endpgm 1302; 1303; GFX7-LABEL: global_system_seq_cst_store: 1304; GFX7: ; %bb.0: ; %entry 1305; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1306; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1307; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1308; GFX7-NEXT: v_mov_b32_e32 v0, s6 1309; GFX7-NEXT: v_mov_b32_e32 v1, s7 1310; GFX7-NEXT: v_mov_b32_e32 v2, s4 1311; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1312; GFX7-NEXT: flat_store_dword v[0:1], v2 1313; GFX7-NEXT: s_endpgm 1314; 1315; GFX10-WGP-LABEL: global_system_seq_cst_store: 1316; GFX10-WGP: ; %bb.0: ; %entry 1317; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1318; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1319; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1320; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1321; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1322; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1323; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1324; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1325; GFX10-WGP-NEXT: s_endpgm 1326; 1327; GFX10-CU-LABEL: global_system_seq_cst_store: 1328; GFX10-CU: ; %bb.0: ; %entry 1329; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1330; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1331; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1332; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1333; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1334; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1335; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1336; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1337; GFX10-CU-NEXT: s_endpgm 1338; 1339; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store: 1340; SKIP-CACHE-INV: ; %bb.0: ; %entry 1341; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1342; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1343; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1344; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1345; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1346; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1347; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1348; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1349; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1350; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1351; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1352; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1353; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1354; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1355; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1356; SKIP-CACHE-INV-NEXT: s_endpgm 1357; 1358; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_store: 1359; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1360; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1361; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1362; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1363; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1364; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1365; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1366; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1367; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1368; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1369; 1370; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_store: 1371; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1372; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1373; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1374; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1375; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1376; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1377; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1378; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1379; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1380; GFX90A-TGSPLIT-NEXT: s_endpgm 1381; 1382; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_store: 1383; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1384; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1385; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1386; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1387; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1388; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1389; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1390; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1391; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1392; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1393; 1394; GFX940-TGSPLIT-LABEL: global_system_seq_cst_store: 1395; GFX940-TGSPLIT: ; %bb.0: ; %entry 1396; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1397; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1398; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1399; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1400; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1401; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1402; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1403; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1404; GFX940-TGSPLIT-NEXT: s_endpgm 1405; 1406; GFX11-WGP-LABEL: global_system_seq_cst_store: 1407; GFX11-WGP: ; %bb.0: ; %entry 1408; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1409; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1410; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1411; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1412; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1413; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1414; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1415; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1416; GFX11-WGP-NEXT: s_endpgm 1417; 1418; GFX11-CU-LABEL: global_system_seq_cst_store: 1419; GFX11-CU: ; %bb.0: ; %entry 1420; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1421; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1422; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1423; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1424; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1425; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1426; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1427; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1428; GFX11-CU-NEXT: s_endpgm 1429; 1430; GFX12-WGP-LABEL: global_system_seq_cst_store: 1431; GFX12-WGP: ; %bb.0: ; %entry 1432; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1433; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1434; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1435; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1436; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1437; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1438; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1439; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1440; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1441; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1442; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1443; GFX12-WGP-NEXT: s_endpgm 1444; 1445; GFX12-CU-LABEL: global_system_seq_cst_store: 1446; GFX12-CU: ; %bb.0: ; %entry 1447; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1448; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1449; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1450; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1451; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1452; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1453; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1454; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1455; GFX12-CU-NEXT: s_wait_storecnt 0x0 1456; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1457; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1458; GFX12-CU-NEXT: s_endpgm 1459 i32 %in, ptr addrspace(1) %out) { 1460entry: 1461 store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4 1462 ret void 1463} 1464 1465define amdgpu_kernel void @global_system_monotonic_atomicrmw( 1466; GFX6-LABEL: global_system_monotonic_atomicrmw: 1467; GFX6: ; %bb.0: ; %entry 1468; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1469; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1470; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1471; GFX6-NEXT: s_mov_b32 s11, s5 1472; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1473; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1474; GFX6-NEXT: s_mov_b32 s10, -1 1475; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1476; GFX6-NEXT: s_mov_b32 s5, s11 1477; GFX6-NEXT: s_mov_b32 s6, s10 1478; GFX6-NEXT: s_mov_b32 s7, s9 1479; GFX6-NEXT: v_mov_b32_e32 v0, s8 1480; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1481; GFX6-NEXT: s_endpgm 1482; 1483; GFX7-LABEL: global_system_monotonic_atomicrmw: 1484; GFX7: ; %bb.0: ; %entry 1485; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1486; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1487; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1488; GFX7-NEXT: v_mov_b32_e32 v0, s6 1489; GFX7-NEXT: v_mov_b32_e32 v1, s7 1490; GFX7-NEXT: v_mov_b32_e32 v2, s4 1491; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1492; GFX7-NEXT: s_endpgm 1493; 1494; GFX10-WGP-LABEL: global_system_monotonic_atomicrmw: 1495; GFX10-WGP: ; %bb.0: ; %entry 1496; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1497; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1498; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1499; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1500; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1501; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1502; GFX10-WGP-NEXT: s_endpgm 1503; 1504; GFX10-CU-LABEL: global_system_monotonic_atomicrmw: 1505; GFX10-CU: ; %bb.0: ; %entry 1506; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1507; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1508; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1509; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1510; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1511; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1512; GFX10-CU-NEXT: s_endpgm 1513; 1514; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw: 1515; SKIP-CACHE-INV: ; %bb.0: ; %entry 1516; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1517; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1518; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1519; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1520; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1521; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1522; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1523; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1524; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1525; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1526; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1527; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1528; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1529; SKIP-CACHE-INV-NEXT: s_endpgm 1530; 1531; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw: 1532; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1533; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1534; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1535; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1536; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1537; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1538; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1539; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1540; 1541; GFX90A-TGSPLIT-LABEL: global_system_monotonic_atomicrmw: 1542; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1543; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1544; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1545; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1546; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1547; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1548; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1549; GFX90A-TGSPLIT-NEXT: s_endpgm 1550; 1551; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw: 1552; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1553; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1554; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1555; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1556; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1557; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1558; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1559; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1560; 1561; GFX940-TGSPLIT-LABEL: global_system_monotonic_atomicrmw: 1562; GFX940-TGSPLIT: ; %bb.0: ; %entry 1563; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1564; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1565; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1566; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1567; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1568; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1569; GFX940-TGSPLIT-NEXT: s_endpgm 1570; 1571; GFX11-WGP-LABEL: global_system_monotonic_atomicrmw: 1572; GFX11-WGP: ; %bb.0: ; %entry 1573; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1574; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1575; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1576; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1577; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1578; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1579; GFX11-WGP-NEXT: s_endpgm 1580; 1581; GFX11-CU-LABEL: global_system_monotonic_atomicrmw: 1582; GFX11-CU: ; %bb.0: ; %entry 1583; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1584; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1585; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1586; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1587; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1588; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1589; GFX11-CU-NEXT: s_endpgm 1590; 1591; GFX12-WGP-LABEL: global_system_monotonic_atomicrmw: 1592; GFX12-WGP: ; %bb.0: ; %entry 1593; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1594; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1595; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1596; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1597; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1598; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1599; GFX12-WGP-NEXT: s_endpgm 1600; 1601; GFX12-CU-LABEL: global_system_monotonic_atomicrmw: 1602; GFX12-CU: ; %bb.0: ; %entry 1603; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1604; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1605; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1606; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1607; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1608; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1609; GFX12-CU-NEXT: s_endpgm 1610 ptr addrspace(1) %out, i32 %in) { 1611entry: 1612 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in monotonic 1613 ret void 1614} 1615 1616define amdgpu_kernel void @global_system_acquire_atomicrmw( 1617; GFX6-LABEL: global_system_acquire_atomicrmw: 1618; GFX6: ; %bb.0: ; %entry 1619; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1620; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1621; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1622; GFX6-NEXT: s_mov_b32 s11, s5 1623; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1624; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1625; GFX6-NEXT: s_mov_b32 s10, -1 1626; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1627; GFX6-NEXT: s_mov_b32 s5, s11 1628; GFX6-NEXT: s_mov_b32 s6, s10 1629; GFX6-NEXT: s_mov_b32 s7, s9 1630; GFX6-NEXT: v_mov_b32_e32 v0, s8 1631; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1632; GFX6-NEXT: s_waitcnt vmcnt(0) 1633; GFX6-NEXT: buffer_wbinvl1 1634; GFX6-NEXT: s_endpgm 1635; 1636; GFX7-LABEL: global_system_acquire_atomicrmw: 1637; GFX7: ; %bb.0: ; %entry 1638; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1639; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1640; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1641; GFX7-NEXT: v_mov_b32_e32 v0, s6 1642; GFX7-NEXT: v_mov_b32_e32 v1, s7 1643; GFX7-NEXT: v_mov_b32_e32 v2, s4 1644; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1645; GFX7-NEXT: s_waitcnt vmcnt(0) 1646; GFX7-NEXT: buffer_wbinvl1_vol 1647; GFX7-NEXT: s_endpgm 1648; 1649; GFX10-WGP-LABEL: global_system_acquire_atomicrmw: 1650; GFX10-WGP: ; %bb.0: ; %entry 1651; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1652; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1653; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1654; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1655; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1656; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1657; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1658; GFX10-WGP-NEXT: buffer_gl1_inv 1659; GFX10-WGP-NEXT: buffer_gl0_inv 1660; GFX10-WGP-NEXT: s_endpgm 1661; 1662; GFX10-CU-LABEL: global_system_acquire_atomicrmw: 1663; GFX10-CU: ; %bb.0: ; %entry 1664; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1665; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1666; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1667; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1668; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1669; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1670; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1671; GFX10-CU-NEXT: buffer_gl1_inv 1672; GFX10-CU-NEXT: buffer_gl0_inv 1673; GFX10-CU-NEXT: s_endpgm 1674; 1675; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: 1676; SKIP-CACHE-INV: ; %bb.0: ; %entry 1677; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1678; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1679; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1680; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1681; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1682; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1683; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1684; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1685; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1686; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1687; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1688; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1689; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1690; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 1691; SKIP-CACHE-INV-NEXT: s_endpgm 1692; 1693; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw: 1694; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1695; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1696; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1697; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1698; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1699; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1700; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1701; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1702; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1703; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1704; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1705; 1706; GFX90A-TGSPLIT-LABEL: global_system_acquire_atomicrmw: 1707; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1708; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1709; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1710; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1711; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1712; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1713; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1714; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1715; GFX90A-TGSPLIT-NEXT: buffer_invl2 1716; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1717; GFX90A-TGSPLIT-NEXT: s_endpgm 1718; 1719; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw: 1720; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1721; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1722; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1723; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1724; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1725; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1726; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1727; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 1728; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 1729; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1730; 1731; GFX940-TGSPLIT-LABEL: global_system_acquire_atomicrmw: 1732; GFX940-TGSPLIT: ; %bb.0: ; %entry 1733; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1734; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1735; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1736; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1737; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1738; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1739; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1740; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 1741; GFX940-TGSPLIT-NEXT: s_endpgm 1742; 1743; GFX11-WGP-LABEL: global_system_acquire_atomicrmw: 1744; GFX11-WGP: ; %bb.0: ; %entry 1745; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1746; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1747; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1748; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1749; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1750; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1751; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1752; GFX11-WGP-NEXT: buffer_gl1_inv 1753; GFX11-WGP-NEXT: buffer_gl0_inv 1754; GFX11-WGP-NEXT: s_endpgm 1755; 1756; GFX11-CU-LABEL: global_system_acquire_atomicrmw: 1757; GFX11-CU: ; %bb.0: ; %entry 1758; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1759; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1760; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1761; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1762; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1763; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1764; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1765; GFX11-CU-NEXT: buffer_gl1_inv 1766; GFX11-CU-NEXT: buffer_gl0_inv 1767; GFX11-CU-NEXT: s_endpgm 1768; 1769; GFX12-WGP-LABEL: global_system_acquire_atomicrmw: 1770; GFX12-WGP: ; %bb.0: ; %entry 1771; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1772; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1773; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1774; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1775; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1776; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1777; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1778; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 1779; GFX12-WGP-NEXT: s_endpgm 1780; 1781; GFX12-CU-LABEL: global_system_acquire_atomicrmw: 1782; GFX12-CU: ; %bb.0: ; %entry 1783; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1784; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1785; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1786; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1787; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1788; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1789; GFX12-CU-NEXT: s_wait_storecnt 0x0 1790; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 1791; GFX12-CU-NEXT: s_endpgm 1792 ptr addrspace(1) %out, i32 %in) { 1793entry: 1794 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire 1795 ret void 1796} 1797 1798define amdgpu_kernel void @global_system_release_atomicrmw( 1799; GFX6-LABEL: global_system_release_atomicrmw: 1800; GFX6: ; %bb.0: ; %entry 1801; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1802; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1803; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1804; GFX6-NEXT: s_mov_b32 s11, s5 1805; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1806; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1807; GFX6-NEXT: s_mov_b32 s10, -1 1808; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1809; GFX6-NEXT: s_mov_b32 s5, s11 1810; GFX6-NEXT: s_mov_b32 s6, s10 1811; GFX6-NEXT: s_mov_b32 s7, s9 1812; GFX6-NEXT: v_mov_b32_e32 v0, s8 1813; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1814; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1815; GFX6-NEXT: s_endpgm 1816; 1817; GFX7-LABEL: global_system_release_atomicrmw: 1818; GFX7: ; %bb.0: ; %entry 1819; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1820; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1821; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1822; GFX7-NEXT: v_mov_b32_e32 v0, s6 1823; GFX7-NEXT: v_mov_b32_e32 v1, s7 1824; GFX7-NEXT: v_mov_b32_e32 v2, s4 1825; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1826; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1827; GFX7-NEXT: s_endpgm 1828; 1829; GFX10-WGP-LABEL: global_system_release_atomicrmw: 1830; GFX10-WGP: ; %bb.0: ; %entry 1831; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1832; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1833; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1834; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1835; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1836; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1837; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1838; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1839; GFX10-WGP-NEXT: s_endpgm 1840; 1841; GFX10-CU-LABEL: global_system_release_atomicrmw: 1842; GFX10-CU: ; %bb.0: ; %entry 1843; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1844; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1845; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1846; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1847; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1848; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1849; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1850; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1851; GFX10-CU-NEXT: s_endpgm 1852; 1853; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw: 1854; SKIP-CACHE-INV: ; %bb.0: ; %entry 1855; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1856; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1857; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1858; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1859; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1860; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1861; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1862; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1863; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1864; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1865; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1866; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1867; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1868; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1869; SKIP-CACHE-INV-NEXT: s_endpgm 1870; 1871; GFX90A-NOTTGSPLIT-LABEL: global_system_release_atomicrmw: 1872; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1873; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1874; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1875; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1876; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1877; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1878; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1879; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1880; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1881; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1882; 1883; GFX90A-TGSPLIT-LABEL: global_system_release_atomicrmw: 1884; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1885; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1886; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1887; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1888; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1889; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1890; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1891; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1892; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1893; GFX90A-TGSPLIT-NEXT: s_endpgm 1894; 1895; GFX940-NOTTGSPLIT-LABEL: global_system_release_atomicrmw: 1896; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1897; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1898; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1899; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1900; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1901; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1902; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1903; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1904; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1905; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1906; 1907; GFX940-TGSPLIT-LABEL: global_system_release_atomicrmw: 1908; GFX940-TGSPLIT: ; %bb.0: ; %entry 1909; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1910; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1911; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1912; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1913; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1914; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1915; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1916; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 1917; GFX940-TGSPLIT-NEXT: s_endpgm 1918; 1919; GFX11-WGP-LABEL: global_system_release_atomicrmw: 1920; GFX11-WGP: ; %bb.0: ; %entry 1921; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1922; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1923; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1924; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1925; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1926; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1927; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1928; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1929; GFX11-WGP-NEXT: s_endpgm 1930; 1931; GFX11-CU-LABEL: global_system_release_atomicrmw: 1932; GFX11-CU: ; %bb.0: ; %entry 1933; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1934; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1935; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1936; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1937; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1938; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1939; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1940; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1941; GFX11-CU-NEXT: s_endpgm 1942; 1943; GFX12-WGP-LABEL: global_system_release_atomicrmw: 1944; GFX12-WGP: ; %bb.0: ; %entry 1945; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1946; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1947; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1948; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1949; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1950; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1951; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1952; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1953; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1954; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1955; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1956; GFX12-WGP-NEXT: s_endpgm 1957; 1958; GFX12-CU-LABEL: global_system_release_atomicrmw: 1959; GFX12-CU: ; %bb.0: ; %entry 1960; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1961; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1962; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1963; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1964; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1965; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1966; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1967; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1968; GFX12-CU-NEXT: s_wait_storecnt 0x0 1969; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1970; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 1971; GFX12-CU-NEXT: s_endpgm 1972 ptr addrspace(1) %out, i32 %in) { 1973entry: 1974 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in release 1975 ret void 1976} 1977 1978define amdgpu_kernel void @global_system_acq_rel_atomicrmw( 1979; GFX6-LABEL: global_system_acq_rel_atomicrmw: 1980; GFX6: ; %bb.0: ; %entry 1981; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1982; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1983; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1984; GFX6-NEXT: s_mov_b32 s11, s5 1985; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1986; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1987; GFX6-NEXT: s_mov_b32 s10, -1 1988; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1989; GFX6-NEXT: s_mov_b32 s5, s11 1990; GFX6-NEXT: s_mov_b32 s6, s10 1991; GFX6-NEXT: s_mov_b32 s7, s9 1992; GFX6-NEXT: v_mov_b32_e32 v0, s8 1993; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1994; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1995; GFX6-NEXT: s_waitcnt vmcnt(0) 1996; GFX6-NEXT: buffer_wbinvl1 1997; GFX6-NEXT: s_endpgm 1998; 1999; GFX7-LABEL: global_system_acq_rel_atomicrmw: 2000; GFX7: ; %bb.0: ; %entry 2001; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2002; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 2003; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2004; GFX7-NEXT: v_mov_b32_e32 v0, s6 2005; GFX7-NEXT: v_mov_b32_e32 v1, s7 2006; GFX7-NEXT: v_mov_b32_e32 v2, s4 2007; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2008; GFX7-NEXT: flat_atomic_swap v[0:1], v2 2009; GFX7-NEXT: s_waitcnt vmcnt(0) 2010; GFX7-NEXT: buffer_wbinvl1_vol 2011; GFX7-NEXT: s_endpgm 2012; 2013; GFX10-WGP-LABEL: global_system_acq_rel_atomicrmw: 2014; GFX10-WGP: ; %bb.0: ; %entry 2015; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2016; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2017; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2018; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2019; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2020; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2021; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2022; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 2023; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2024; GFX10-WGP-NEXT: buffer_gl1_inv 2025; GFX10-WGP-NEXT: buffer_gl0_inv 2026; GFX10-WGP-NEXT: s_endpgm 2027; 2028; GFX10-CU-LABEL: global_system_acq_rel_atomicrmw: 2029; GFX10-CU: ; %bb.0: ; %entry 2030; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2031; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2032; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2033; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2034; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2035; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2036; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2037; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 2038; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2039; GFX10-CU-NEXT: buffer_gl1_inv 2040; GFX10-CU-NEXT: buffer_gl0_inv 2041; GFX10-CU-NEXT: s_endpgm 2042; 2043; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: 2044; SKIP-CACHE-INV: ; %bb.0: ; %entry 2045; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2046; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2047; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2048; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2049; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2050; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2051; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2052; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2053; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2054; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2055; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2056; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2057; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2058; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 2059; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2060; SKIP-CACHE-INV-NEXT: s_endpgm 2061; 2062; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 2063; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2064; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2065; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2066; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2067; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2068; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2069; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2070; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2071; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2072; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2073; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2074; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2075; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2076; 2077; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 2078; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2079; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2080; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2081; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2082; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2083; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2084; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2085; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2086; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2087; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2088; GFX90A-TGSPLIT-NEXT: buffer_invl2 2089; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2090; GFX90A-TGSPLIT-NEXT: s_endpgm 2091; 2092; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 2093; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2094; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2095; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2096; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2097; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2098; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2099; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2100; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2101; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 2102; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2103; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2104; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2105; 2106; GFX940-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw: 2107; GFX940-TGSPLIT: ; %bb.0: ; %entry 2108; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2109; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2110; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2111; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2112; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2113; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2114; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2115; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 2116; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2117; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2118; GFX940-TGSPLIT-NEXT: s_endpgm 2119; 2120; GFX11-WGP-LABEL: global_system_acq_rel_atomicrmw: 2121; GFX11-WGP: ; %bb.0: ; %entry 2122; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2123; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2124; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2125; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2126; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2127; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2128; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2129; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2130; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2131; GFX11-WGP-NEXT: buffer_gl1_inv 2132; GFX11-WGP-NEXT: buffer_gl0_inv 2133; GFX11-WGP-NEXT: s_endpgm 2134; 2135; GFX11-CU-LABEL: global_system_acq_rel_atomicrmw: 2136; GFX11-CU: ; %bb.0: ; %entry 2137; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2138; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2139; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2140; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2141; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2142; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2143; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2144; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2145; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2146; GFX11-CU-NEXT: buffer_gl1_inv 2147; GFX11-CU-NEXT: buffer_gl0_inv 2148; GFX11-CU-NEXT: s_endpgm 2149; 2150; GFX12-WGP-LABEL: global_system_acq_rel_atomicrmw: 2151; GFX12-WGP: ; %bb.0: ; %entry 2152; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2153; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2154; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2155; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2156; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2157; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2158; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2159; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2160; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2161; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2162; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 2163; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2164; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2165; GFX12-WGP-NEXT: s_endpgm 2166; 2167; GFX12-CU-LABEL: global_system_acq_rel_atomicrmw: 2168; GFX12-CU: ; %bb.0: ; %entry 2169; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2170; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2171; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2172; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2173; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2174; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2175; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2176; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2177; GFX12-CU-NEXT: s_wait_storecnt 0x0 2178; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2179; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 2180; GFX12-CU-NEXT: s_wait_storecnt 0x0 2181; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2182; GFX12-CU-NEXT: s_endpgm 2183 ptr addrspace(1) %out, i32 %in) { 2184entry: 2185 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel 2186 ret void 2187} 2188 2189define amdgpu_kernel void @global_system_seq_cst_atomicrmw( 2190; GFX6-LABEL: global_system_seq_cst_atomicrmw: 2191; GFX6: ; %bb.0: ; %entry 2192; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2193; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2194; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2195; GFX6-NEXT: s_mov_b32 s11, s5 2196; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2197; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2198; GFX6-NEXT: s_mov_b32 s10, -1 2199; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2200; GFX6-NEXT: s_mov_b32 s5, s11 2201; GFX6-NEXT: s_mov_b32 s6, s10 2202; GFX6-NEXT: s_mov_b32 s7, s9 2203; GFX6-NEXT: v_mov_b32_e32 v0, s8 2204; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2205; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 2206; GFX6-NEXT: s_waitcnt vmcnt(0) 2207; GFX6-NEXT: buffer_wbinvl1 2208; GFX6-NEXT: s_endpgm 2209; 2210; GFX7-LABEL: global_system_seq_cst_atomicrmw: 2211; GFX7: ; %bb.0: ; %entry 2212; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2213; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 2214; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2215; GFX7-NEXT: v_mov_b32_e32 v0, s6 2216; GFX7-NEXT: v_mov_b32_e32 v1, s7 2217; GFX7-NEXT: v_mov_b32_e32 v2, s4 2218; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2219; GFX7-NEXT: flat_atomic_swap v[0:1], v2 2220; GFX7-NEXT: s_waitcnt vmcnt(0) 2221; GFX7-NEXT: buffer_wbinvl1_vol 2222; GFX7-NEXT: s_endpgm 2223; 2224; GFX10-WGP-LABEL: global_system_seq_cst_atomicrmw: 2225; GFX10-WGP: ; %bb.0: ; %entry 2226; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2227; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2228; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2229; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2230; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2231; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2232; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2233; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 2234; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2235; GFX10-WGP-NEXT: buffer_gl1_inv 2236; GFX10-WGP-NEXT: buffer_gl0_inv 2237; GFX10-WGP-NEXT: s_endpgm 2238; 2239; GFX10-CU-LABEL: global_system_seq_cst_atomicrmw: 2240; GFX10-CU: ; %bb.0: ; %entry 2241; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2242; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2243; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2244; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2245; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2246; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2247; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2248; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 2249; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2250; GFX10-CU-NEXT: buffer_gl1_inv 2251; GFX10-CU-NEXT: buffer_gl0_inv 2252; GFX10-CU-NEXT: s_endpgm 2253; 2254; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: 2255; SKIP-CACHE-INV: ; %bb.0: ; %entry 2256; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2257; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2258; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2259; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2260; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2261; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2262; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2263; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2264; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2265; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2266; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2267; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2268; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2269; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 2270; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2271; SKIP-CACHE-INV-NEXT: s_endpgm 2272; 2273; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 2274; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2275; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2276; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2277; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2278; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2279; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2280; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2281; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2282; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2283; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2284; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2285; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2286; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2287; 2288; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 2289; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2290; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2291; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2292; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2293; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2294; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2295; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2296; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2297; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2298; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2299; GFX90A-TGSPLIT-NEXT: buffer_invl2 2300; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2301; GFX90A-TGSPLIT-NEXT: s_endpgm 2302; 2303; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 2304; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2305; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2306; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2307; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2308; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2309; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2310; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2311; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2312; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 2313; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2314; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2315; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2316; 2317; GFX940-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw: 2318; GFX940-TGSPLIT: ; %bb.0: ; %entry 2319; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2320; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2321; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2322; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2323; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2324; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2325; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2326; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 2327; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2328; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2329; GFX940-TGSPLIT-NEXT: s_endpgm 2330; 2331; GFX11-WGP-LABEL: global_system_seq_cst_atomicrmw: 2332; GFX11-WGP: ; %bb.0: ; %entry 2333; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2334; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2335; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2336; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2337; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2338; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2339; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2340; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2341; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2342; GFX11-WGP-NEXT: buffer_gl1_inv 2343; GFX11-WGP-NEXT: buffer_gl0_inv 2344; GFX11-WGP-NEXT: s_endpgm 2345; 2346; GFX11-CU-LABEL: global_system_seq_cst_atomicrmw: 2347; GFX11-CU: ; %bb.0: ; %entry 2348; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2349; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2350; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2351; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2352; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2353; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2354; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2355; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2356; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2357; GFX11-CU-NEXT: buffer_gl1_inv 2358; GFX11-CU-NEXT: buffer_gl0_inv 2359; GFX11-CU-NEXT: s_endpgm 2360; 2361; GFX12-WGP-LABEL: global_system_seq_cst_atomicrmw: 2362; GFX12-WGP: ; %bb.0: ; %entry 2363; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2364; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2365; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2366; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2367; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2368; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2369; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2370; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2371; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2372; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2373; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 2374; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2375; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2376; GFX12-WGP-NEXT: s_endpgm 2377; 2378; GFX12-CU-LABEL: global_system_seq_cst_atomicrmw: 2379; GFX12-CU: ; %bb.0: ; %entry 2380; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2381; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2382; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2383; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2384; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2385; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2386; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2387; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2388; GFX12-CU-NEXT: s_wait_storecnt 0x0 2389; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2390; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 2391; GFX12-CU-NEXT: s_wait_storecnt 0x0 2392; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2393; GFX12-CU-NEXT: s_endpgm 2394 ptr addrspace(1) %out, i32 %in) { 2395entry: 2396 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst 2397 ret void 2398} 2399 2400define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( 2401; GFX6-LABEL: global_system_acquire_ret_atomicrmw: 2402; GFX6: ; %bb.0: ; %entry 2403; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2404; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2405; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2406; GFX6-NEXT: s_mov_b32 s11, s5 2407; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2408; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2409; GFX6-NEXT: s_mov_b32 s10, -1 2410; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2411; GFX6-NEXT: s_mov_b32 s5, s11 2412; GFX6-NEXT: s_mov_b32 s6, s10 2413; GFX6-NEXT: s_mov_b32 s7, s9 2414; GFX6-NEXT: v_mov_b32_e32 v0, s8 2415; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2416; GFX6-NEXT: s_waitcnt vmcnt(0) 2417; GFX6-NEXT: buffer_wbinvl1 2418; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2419; GFX6-NEXT: s_endpgm 2420; 2421; GFX7-LABEL: global_system_acquire_ret_atomicrmw: 2422; GFX7: ; %bb.0: ; %entry 2423; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2424; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2425; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2426; GFX7-NEXT: v_mov_b32_e32 v0, s4 2427; GFX7-NEXT: v_mov_b32_e32 v1, s5 2428; GFX7-NEXT: v_mov_b32_e32 v2, s6 2429; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2430; GFX7-NEXT: s_waitcnt vmcnt(0) 2431; GFX7-NEXT: buffer_wbinvl1_vol 2432; GFX7-NEXT: v_mov_b32_e32 v0, s4 2433; GFX7-NEXT: v_mov_b32_e32 v1, s5 2434; GFX7-NEXT: flat_store_dword v[0:1], v2 2435; GFX7-NEXT: s_endpgm 2436; 2437; GFX10-WGP-LABEL: global_system_acquire_ret_atomicrmw: 2438; GFX10-WGP: ; %bb.0: ; %entry 2439; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2440; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2441; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2442; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2443; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2444; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2445; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2446; GFX10-WGP-NEXT: buffer_gl1_inv 2447; GFX10-WGP-NEXT: buffer_gl0_inv 2448; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2449; GFX10-WGP-NEXT: s_endpgm 2450; 2451; GFX10-CU-LABEL: global_system_acquire_ret_atomicrmw: 2452; GFX10-CU: ; %bb.0: ; %entry 2453; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2454; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2455; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2456; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2457; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2458; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2459; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2460; GFX10-CU-NEXT: buffer_gl1_inv 2461; GFX10-CU-NEXT: buffer_gl0_inv 2462; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2463; GFX10-CU-NEXT: s_endpgm 2464; 2465; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw: 2466; SKIP-CACHE-INV: ; %bb.0: ; %entry 2467; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2468; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2469; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2470; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2471; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2472; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2473; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2474; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2475; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2476; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2477; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2479; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2480; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2481; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2482; SKIP-CACHE-INV-NEXT: s_endpgm 2483; 2484; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 2485; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2486; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2487; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2488; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2489; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2490; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2491; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2492; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2493; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2494; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2495; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2496; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2497; 2498; GFX90A-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 2499; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2500; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2501; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2502; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2503; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2504; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2505; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2506; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2507; GFX90A-TGSPLIT-NEXT: buffer_invl2 2508; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2509; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2510; GFX90A-TGSPLIT-NEXT: s_endpgm 2511; 2512; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 2513; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2514; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2515; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2516; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2517; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2518; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2519; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2520; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2521; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2522; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2523; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2524; 2525; GFX940-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: 2526; GFX940-TGSPLIT: ; %bb.0: ; %entry 2527; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2528; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2529; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2530; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2531; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2532; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2533; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2534; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2535; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2536; GFX940-TGSPLIT-NEXT: s_endpgm 2537; 2538; GFX11-WGP-LABEL: global_system_acquire_ret_atomicrmw: 2539; GFX11-WGP: ; %bb.0: ; %entry 2540; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2541; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2542; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2543; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2544; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2545; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2546; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2547; GFX11-WGP-NEXT: buffer_gl1_inv 2548; GFX11-WGP-NEXT: buffer_gl0_inv 2549; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2550; GFX11-WGP-NEXT: s_endpgm 2551; 2552; GFX11-CU-LABEL: global_system_acquire_ret_atomicrmw: 2553; GFX11-CU: ; %bb.0: ; %entry 2554; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2555; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2556; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2557; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2558; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2559; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2560; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2561; GFX11-CU-NEXT: buffer_gl1_inv 2562; GFX11-CU-NEXT: buffer_gl0_inv 2563; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2564; GFX11-CU-NEXT: s_endpgm 2565; 2566; GFX12-WGP-LABEL: global_system_acquire_ret_atomicrmw: 2567; GFX12-WGP: ; %bb.0: ; %entry 2568; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2569; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2570; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2571; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2572; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2573; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2574; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2575; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2576; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2577; GFX12-WGP-NEXT: s_endpgm 2578; 2579; GFX12-CU-LABEL: global_system_acquire_ret_atomicrmw: 2580; GFX12-CU: ; %bb.0: ; %entry 2581; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2582; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2583; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2584; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2585; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2586; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2587; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2588; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2589; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2590; GFX12-CU-NEXT: s_endpgm 2591 ptr addrspace(1) %out, i32 %in) { 2592entry: 2593 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire 2594 store i32 %val, ptr addrspace(1) %out, align 4 2595 ret void 2596} 2597 2598define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( 2599; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw: 2600; GFX6: ; %bb.0: ; %entry 2601; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2602; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2603; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2604; GFX6-NEXT: s_mov_b32 s11, s5 2605; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2606; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2607; GFX6-NEXT: s_mov_b32 s10, -1 2608; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2609; GFX6-NEXT: s_mov_b32 s5, s11 2610; GFX6-NEXT: s_mov_b32 s6, s10 2611; GFX6-NEXT: s_mov_b32 s7, s9 2612; GFX6-NEXT: v_mov_b32_e32 v0, s8 2613; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2614; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2615; GFX6-NEXT: s_waitcnt vmcnt(0) 2616; GFX6-NEXT: buffer_wbinvl1 2617; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2618; GFX6-NEXT: s_endpgm 2619; 2620; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw: 2621; GFX7: ; %bb.0: ; %entry 2622; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2623; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2624; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2625; GFX7-NEXT: v_mov_b32_e32 v0, s4 2626; GFX7-NEXT: v_mov_b32_e32 v1, s5 2627; GFX7-NEXT: v_mov_b32_e32 v2, s6 2628; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2629; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2630; GFX7-NEXT: s_waitcnt vmcnt(0) 2631; GFX7-NEXT: buffer_wbinvl1_vol 2632; GFX7-NEXT: v_mov_b32_e32 v0, s4 2633; GFX7-NEXT: v_mov_b32_e32 v1, s5 2634; GFX7-NEXT: flat_store_dword v[0:1], v2 2635; GFX7-NEXT: s_endpgm 2636; 2637; GFX10-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: 2638; GFX10-WGP: ; %bb.0: ; %entry 2639; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2640; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2641; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2642; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2643; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2644; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2645; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2646; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2647; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2648; GFX10-WGP-NEXT: buffer_gl1_inv 2649; GFX10-WGP-NEXT: buffer_gl0_inv 2650; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2651; GFX10-WGP-NEXT: s_endpgm 2652; 2653; GFX10-CU-LABEL: global_system_acq_rel_ret_atomicrmw: 2654; GFX10-CU: ; %bb.0: ; %entry 2655; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2656; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2657; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2658; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2659; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2660; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2661; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2662; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2663; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2664; GFX10-CU-NEXT: buffer_gl1_inv 2665; GFX10-CU-NEXT: buffer_gl0_inv 2666; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2667; GFX10-CU-NEXT: s_endpgm 2668; 2669; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw: 2670; SKIP-CACHE-INV: ; %bb.0: ; %entry 2671; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2672; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2673; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2674; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2675; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2676; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2677; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2678; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2679; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2680; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2681; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2682; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2683; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2684; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2685; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2686; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2687; SKIP-CACHE-INV-NEXT: s_endpgm 2688; 2689; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 2690; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2691; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2692; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2693; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2694; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2695; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2696; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2697; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2698; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2699; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2700; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2701; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2702; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2703; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2704; 2705; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 2706; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2707; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2708; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2709; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2710; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2711; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2712; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2713; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2714; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2715; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2716; GFX90A-TGSPLIT-NEXT: buffer_invl2 2717; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2718; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2719; GFX90A-TGSPLIT-NEXT: s_endpgm 2720; 2721; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 2722; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2723; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2724; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2725; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2726; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2727; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2728; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2729; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2730; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2731; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2732; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2733; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2734; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2735; 2736; GFX940-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: 2737; GFX940-TGSPLIT: ; %bb.0: ; %entry 2738; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2739; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2740; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2741; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2742; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2743; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2744; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2745; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2746; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2747; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2748; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2749; GFX940-TGSPLIT-NEXT: s_endpgm 2750; 2751; GFX11-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: 2752; GFX11-WGP: ; %bb.0: ; %entry 2753; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2754; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2755; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2756; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2757; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2758; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2759; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2760; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2761; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2762; GFX11-WGP-NEXT: buffer_gl1_inv 2763; GFX11-WGP-NEXT: buffer_gl0_inv 2764; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2765; GFX11-WGP-NEXT: s_endpgm 2766; 2767; GFX11-CU-LABEL: global_system_acq_rel_ret_atomicrmw: 2768; GFX11-CU: ; %bb.0: ; %entry 2769; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2770; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2771; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2772; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2773; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2774; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2775; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2776; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2777; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2778; GFX11-CU-NEXT: buffer_gl1_inv 2779; GFX11-CU-NEXT: buffer_gl0_inv 2780; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2781; GFX11-CU-NEXT: s_endpgm 2782; 2783; GFX12-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: 2784; GFX12-WGP: ; %bb.0: ; %entry 2785; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2786; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2787; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2788; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2789; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2790; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2791; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2792; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2793; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2794; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2795; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2796; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2797; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2798; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2799; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2800; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2801; GFX12-WGP-NEXT: s_endpgm 2802; 2803; GFX12-CU-LABEL: global_system_acq_rel_ret_atomicrmw: 2804; GFX12-CU: ; %bb.0: ; %entry 2805; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2806; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2807; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2808; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2809; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2810; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2811; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2812; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2813; GFX12-CU-NEXT: s_wait_storecnt 0x0 2814; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2815; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2816; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2817; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2818; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2819; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2820; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2821; GFX12-CU-NEXT: s_endpgm 2822 ptr addrspace(1) %out, i32 %in) { 2823entry: 2824 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel 2825 store i32 %val, ptr addrspace(1) %out, align 4 2826 ret void 2827} 2828 2829define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( 2830; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw: 2831; GFX6: ; %bb.0: ; %entry 2832; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2833; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2834; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2835; GFX6-NEXT: s_mov_b32 s11, s5 2836; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2837; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2838; GFX6-NEXT: s_mov_b32 s10, -1 2839; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2840; GFX6-NEXT: s_mov_b32 s5, s11 2841; GFX6-NEXT: s_mov_b32 s6, s10 2842; GFX6-NEXT: s_mov_b32 s7, s9 2843; GFX6-NEXT: v_mov_b32_e32 v0, s8 2844; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2845; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2846; GFX6-NEXT: s_waitcnt vmcnt(0) 2847; GFX6-NEXT: buffer_wbinvl1 2848; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2849; GFX6-NEXT: s_endpgm 2850; 2851; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw: 2852; GFX7: ; %bb.0: ; %entry 2853; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2854; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2855; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2856; GFX7-NEXT: v_mov_b32_e32 v0, s4 2857; GFX7-NEXT: v_mov_b32_e32 v1, s5 2858; GFX7-NEXT: v_mov_b32_e32 v2, s6 2859; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2860; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2861; GFX7-NEXT: s_waitcnt vmcnt(0) 2862; GFX7-NEXT: buffer_wbinvl1_vol 2863; GFX7-NEXT: v_mov_b32_e32 v0, s4 2864; GFX7-NEXT: v_mov_b32_e32 v1, s5 2865; GFX7-NEXT: flat_store_dword v[0:1], v2 2866; GFX7-NEXT: s_endpgm 2867; 2868; GFX10-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: 2869; GFX10-WGP: ; %bb.0: ; %entry 2870; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2871; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2872; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2873; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2874; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2875; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2876; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2877; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2878; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2879; GFX10-WGP-NEXT: buffer_gl1_inv 2880; GFX10-WGP-NEXT: buffer_gl0_inv 2881; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2882; GFX10-WGP-NEXT: s_endpgm 2883; 2884; GFX10-CU-LABEL: global_system_seq_cst_ret_atomicrmw: 2885; GFX10-CU: ; %bb.0: ; %entry 2886; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2887; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2888; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2889; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2890; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2891; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2892; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2893; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2894; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2895; GFX10-CU-NEXT: buffer_gl1_inv 2896; GFX10-CU-NEXT: buffer_gl0_inv 2897; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2898; GFX10-CU-NEXT: s_endpgm 2899; 2900; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw: 2901; SKIP-CACHE-INV: ; %bb.0: ; %entry 2902; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2903; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2904; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2905; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2906; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2907; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2908; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2909; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2910; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2911; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2912; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2913; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2914; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2915; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2916; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2917; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2918; SKIP-CACHE-INV-NEXT: s_endpgm 2919; 2920; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 2921; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2922; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2923; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2924; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2925; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2926; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2927; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2928; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2929; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2930; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2931; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2932; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2933; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2934; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2935; 2936; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 2937; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2938; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2939; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2940; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2941; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2942; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2943; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2944; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2945; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2946; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2947; GFX90A-TGSPLIT-NEXT: buffer_invl2 2948; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2949; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2950; GFX90A-TGSPLIT-NEXT: s_endpgm 2951; 2952; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 2953; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2954; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2955; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2956; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2957; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2958; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2959; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2960; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2961; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2962; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2963; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2964; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2965; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2966; 2967; GFX940-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: 2968; GFX940-TGSPLIT: ; %bb.0: ; %entry 2969; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2970; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2971; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2972; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2973; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2974; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2975; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2976; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 2977; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2978; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2979; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2980; GFX940-TGSPLIT-NEXT: s_endpgm 2981; 2982; GFX11-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: 2983; GFX11-WGP: ; %bb.0: ; %entry 2984; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2985; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2986; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2987; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2988; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2989; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2990; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2991; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2992; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2993; GFX11-WGP-NEXT: buffer_gl1_inv 2994; GFX11-WGP-NEXT: buffer_gl0_inv 2995; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2996; GFX11-WGP-NEXT: s_endpgm 2997; 2998; GFX11-CU-LABEL: global_system_seq_cst_ret_atomicrmw: 2999; GFX11-CU: ; %bb.0: ; %entry 3000; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3001; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3002; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 3003; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3004; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 3005; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3006; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3007; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 3008; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 3009; GFX11-CU-NEXT: buffer_gl1_inv 3010; GFX11-CU-NEXT: buffer_gl0_inv 3011; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 3012; GFX11-CU-NEXT: s_endpgm 3013; 3014; GFX12-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: 3015; GFX12-WGP: ; %bb.0: ; %entry 3016; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3017; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3018; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 3019; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3020; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 3021; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 3022; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3023; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3024; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3025; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3026; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 3027; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3028; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3029; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 3030; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 3031; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 3032; GFX12-WGP-NEXT: s_endpgm 3033; 3034; GFX12-CU-LABEL: global_system_seq_cst_ret_atomicrmw: 3035; GFX12-CU: ; %bb.0: ; %entry 3036; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3037; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3038; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 3039; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3040; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 3041; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 3042; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3043; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3044; GFX12-CU-NEXT: s_wait_storecnt 0x0 3045; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3046; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 3047; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3048; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3049; GFX12-CU-NEXT: s_wait_loadcnt 0x0 3050; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 3051; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 3052; GFX12-CU-NEXT: s_endpgm 3053 ptr addrspace(1) %out, i32 %in) { 3054entry: 3055 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst 3056 store i32 %val, ptr addrspace(1) %out, align 4 3057 ret void 3058} 3059 3060define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( 3061; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg: 3062; GFX6: ; %bb.0: ; %entry 3063; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3064; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3065; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3066; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3067; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3068; GFX6-NEXT: s_mov_b32 s12, s5 3069; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3070; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3071; GFX6-NEXT: s_mov_b32 s11, -1 3072; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3073; GFX6-NEXT: s_mov_b32 s5, s12 3074; GFX6-NEXT: s_mov_b32 s6, s11 3075; GFX6-NEXT: s_mov_b32 s7, s10 3076; GFX6-NEXT: v_mov_b32_e32 v0, s9 3077; GFX6-NEXT: v_mov_b32_e32 v2, s8 3078; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3079; GFX6-NEXT: v_mov_b32_e32 v1, v2 3080; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3081; GFX6-NEXT: s_endpgm 3082; 3083; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg: 3084; GFX7: ; %bb.0: ; %entry 3085; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3086; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3087; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3088; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3089; GFX7-NEXT: s_mov_b64 s[10:11], 16 3090; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3091; GFX7-NEXT: s_mov_b32 s4, s8 3092; GFX7-NEXT: s_mov_b32 s5, s9 3093; GFX7-NEXT: s_mov_b32 s9, s10 3094; GFX7-NEXT: s_mov_b32 s8, s11 3095; GFX7-NEXT: s_add_u32 s4, s4, s9 3096; GFX7-NEXT: s_addc_u32 s8, s5, s8 3097; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3098; GFX7-NEXT: s_mov_b32 s5, s8 3099; GFX7-NEXT: v_mov_b32_e32 v2, s7 3100; GFX7-NEXT: v_mov_b32_e32 v0, s6 3101; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3102; GFX7-NEXT: v_mov_b32_e32 v3, v0 3103; GFX7-NEXT: v_mov_b32_e32 v0, s4 3104; GFX7-NEXT: v_mov_b32_e32 v1, s5 3105; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3106; GFX7-NEXT: s_endpgm 3107; 3108; GFX10-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: 3109; GFX10-WGP: ; %bb.0: ; %entry 3110; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3111; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3112; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3113; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3114; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3115; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3116; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3117; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3118; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3119; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3120; GFX10-WGP-NEXT: s_endpgm 3121; 3122; GFX10-CU-LABEL: global_system_monotonic_monotonic_cmpxchg: 3123; GFX10-CU: ; %bb.0: ; %entry 3124; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3125; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3126; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3127; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3128; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3129; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3130; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3131; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3132; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3133; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3134; GFX10-CU-NEXT: s_endpgm 3135; 3136; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg: 3137; SKIP-CACHE-INV: ; %bb.0: ; %entry 3138; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3139; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3140; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3141; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3142; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3143; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3144; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3145; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3146; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3147; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3148; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3149; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3150; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3151; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3152; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3153; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3154; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3155; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3156; SKIP-CACHE-INV-NEXT: s_endpgm 3157; 3158; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 3159; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3160; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3161; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3162; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3163; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3164; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3165; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3166; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3167; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3168; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3169; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3170; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3171; 3172; GFX90A-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 3173; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3174; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3175; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3176; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3177; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3178; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3179; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3180; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3181; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3182; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3183; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3184; GFX90A-TGSPLIT-NEXT: s_endpgm 3185; 3186; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 3187; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3188; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3189; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3190; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3191; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3192; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3193; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3194; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3195; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3196; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3197; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3198; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3199; 3200; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: 3201; GFX940-TGSPLIT: ; %bb.0: ; %entry 3202; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3203; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3204; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3205; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3206; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3207; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3208; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3209; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3210; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3211; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3212; GFX940-TGSPLIT-NEXT: s_endpgm 3213; 3214; GFX11-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: 3215; GFX11-WGP: ; %bb.0: ; %entry 3216; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3217; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3218; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3219; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3220; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3221; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3222; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3223; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3224; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3225; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3226; GFX11-WGP-NEXT: s_endpgm 3227; 3228; GFX11-CU-LABEL: global_system_monotonic_monotonic_cmpxchg: 3229; GFX11-CU: ; %bb.0: ; %entry 3230; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3231; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3232; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3233; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3234; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3235; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3236; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3237; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3238; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3239; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3240; GFX11-CU-NEXT: s_endpgm 3241; 3242; GFX12-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: 3243; GFX12-WGP: ; %bb.0: ; %entry 3244; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3245; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3246; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3247; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3248; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3249; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3250; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3251; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3252; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3253; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3254; GFX12-WGP-NEXT: s_endpgm 3255; 3256; GFX12-CU-LABEL: global_system_monotonic_monotonic_cmpxchg: 3257; GFX12-CU: ; %bb.0: ; %entry 3258; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3259; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3260; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3261; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3262; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3263; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3264; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3265; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3266; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3267; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3268; GFX12-CU-NEXT: s_endpgm 3269 ptr addrspace(1) %out, i32 %in, i32 %old) { 3270entry: 3271 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3272 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic 3273 ret void 3274} 3275 3276define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( 3277; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg: 3278; GFX6: ; %bb.0: ; %entry 3279; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3280; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3281; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3282; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3283; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3284; GFX6-NEXT: s_mov_b32 s12, s5 3285; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3286; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3287; GFX6-NEXT: s_mov_b32 s11, -1 3288; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3289; GFX6-NEXT: s_mov_b32 s5, s12 3290; GFX6-NEXT: s_mov_b32 s6, s11 3291; GFX6-NEXT: s_mov_b32 s7, s10 3292; GFX6-NEXT: v_mov_b32_e32 v0, s9 3293; GFX6-NEXT: v_mov_b32_e32 v2, s8 3294; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3295; GFX6-NEXT: v_mov_b32_e32 v1, v2 3296; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3297; GFX6-NEXT: s_waitcnt vmcnt(0) 3298; GFX6-NEXT: buffer_wbinvl1 3299; GFX6-NEXT: s_endpgm 3300; 3301; GFX7-LABEL: global_system_acquire_monotonic_cmpxchg: 3302; GFX7: ; %bb.0: ; %entry 3303; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3304; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3305; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3306; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3307; GFX7-NEXT: s_mov_b64 s[10:11], 16 3308; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3309; GFX7-NEXT: s_mov_b32 s4, s8 3310; GFX7-NEXT: s_mov_b32 s5, s9 3311; GFX7-NEXT: s_mov_b32 s9, s10 3312; GFX7-NEXT: s_mov_b32 s8, s11 3313; GFX7-NEXT: s_add_u32 s4, s4, s9 3314; GFX7-NEXT: s_addc_u32 s8, s5, s8 3315; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3316; GFX7-NEXT: s_mov_b32 s5, s8 3317; GFX7-NEXT: v_mov_b32_e32 v2, s7 3318; GFX7-NEXT: v_mov_b32_e32 v0, s6 3319; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3320; GFX7-NEXT: v_mov_b32_e32 v3, v0 3321; GFX7-NEXT: v_mov_b32_e32 v0, s4 3322; GFX7-NEXT: v_mov_b32_e32 v1, s5 3323; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3324; GFX7-NEXT: s_waitcnt vmcnt(0) 3325; GFX7-NEXT: buffer_wbinvl1_vol 3326; GFX7-NEXT: s_endpgm 3327; 3328; GFX10-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: 3329; GFX10-WGP: ; %bb.0: ; %entry 3330; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3331; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3332; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3333; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3334; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3335; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3336; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3337; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3338; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3339; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3340; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3341; GFX10-WGP-NEXT: buffer_gl1_inv 3342; GFX10-WGP-NEXT: buffer_gl0_inv 3343; GFX10-WGP-NEXT: s_endpgm 3344; 3345; GFX10-CU-LABEL: global_system_acquire_monotonic_cmpxchg: 3346; GFX10-CU: ; %bb.0: ; %entry 3347; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3348; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3349; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3350; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3351; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3352; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3353; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3354; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3355; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3356; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3357; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3358; GFX10-CU-NEXT: buffer_gl1_inv 3359; GFX10-CU-NEXT: buffer_gl0_inv 3360; GFX10-CU-NEXT: s_endpgm 3361; 3362; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: 3363; SKIP-CACHE-INV: ; %bb.0: ; %entry 3364; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3365; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3366; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3367; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3368; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3369; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3370; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3371; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3372; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3373; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3374; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3375; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3376; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3377; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3378; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3379; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3380; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3381; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3382; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3383; SKIP-CACHE-INV-NEXT: s_endpgm 3384; 3385; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 3386; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3387; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3388; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3389; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3390; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3391; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3392; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3393; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3394; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3395; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3396; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3397; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3398; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3399; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3400; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3401; 3402; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 3403; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3404; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3405; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3406; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3407; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3408; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3409; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3410; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3411; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3412; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3413; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3414; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3415; GFX90A-TGSPLIT-NEXT: buffer_invl2 3416; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3417; GFX90A-TGSPLIT-NEXT: s_endpgm 3418; 3419; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 3420; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3421; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3422; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3423; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3424; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3425; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3426; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3427; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3428; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3429; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3430; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3431; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3432; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 3433; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3434; 3435; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: 3436; GFX940-TGSPLIT: ; %bb.0: ; %entry 3437; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3438; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3439; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3440; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3441; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3442; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3443; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3444; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3445; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3446; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3447; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3448; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 3449; GFX940-TGSPLIT-NEXT: s_endpgm 3450; 3451; GFX11-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: 3452; GFX11-WGP: ; %bb.0: ; %entry 3453; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3454; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3455; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3456; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3457; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3458; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3459; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3460; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3461; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3462; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3463; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3464; GFX11-WGP-NEXT: buffer_gl1_inv 3465; GFX11-WGP-NEXT: buffer_gl0_inv 3466; GFX11-WGP-NEXT: s_endpgm 3467; 3468; GFX11-CU-LABEL: global_system_acquire_monotonic_cmpxchg: 3469; GFX11-CU: ; %bb.0: ; %entry 3470; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3471; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3472; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3473; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3474; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3475; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3476; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3477; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3478; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3479; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3480; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3481; GFX11-CU-NEXT: buffer_gl1_inv 3482; GFX11-CU-NEXT: buffer_gl0_inv 3483; GFX11-CU-NEXT: s_endpgm 3484; 3485; GFX12-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: 3486; GFX12-WGP: ; %bb.0: ; %entry 3487; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3488; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3489; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3490; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3491; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3492; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3493; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3494; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3495; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3496; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3497; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3498; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 3499; GFX12-WGP-NEXT: s_endpgm 3500; 3501; GFX12-CU-LABEL: global_system_acquire_monotonic_cmpxchg: 3502; GFX12-CU: ; %bb.0: ; %entry 3503; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3504; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3505; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3506; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3507; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3508; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3509; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3510; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3511; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3512; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3513; GFX12-CU-NEXT: s_wait_storecnt 0x0 3514; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 3515; GFX12-CU-NEXT: s_endpgm 3516 ptr addrspace(1) %out, i32 %in, i32 %old) { 3517entry: 3518 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3519 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic 3520 ret void 3521} 3522 3523define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( 3524; GFX6-LABEL: global_system_release_monotonic_cmpxchg: 3525; GFX6: ; %bb.0: ; %entry 3526; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3527; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3528; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3529; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3530; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3531; GFX6-NEXT: s_mov_b32 s12, s5 3532; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3533; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3534; GFX6-NEXT: s_mov_b32 s11, -1 3535; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3536; GFX6-NEXT: s_mov_b32 s5, s12 3537; GFX6-NEXT: s_mov_b32 s6, s11 3538; GFX6-NEXT: s_mov_b32 s7, s10 3539; GFX6-NEXT: v_mov_b32_e32 v0, s9 3540; GFX6-NEXT: v_mov_b32_e32 v2, s8 3541; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3542; GFX6-NEXT: v_mov_b32_e32 v1, v2 3543; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3544; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3545; GFX6-NEXT: s_endpgm 3546; 3547; GFX7-LABEL: global_system_release_monotonic_cmpxchg: 3548; GFX7: ; %bb.0: ; %entry 3549; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3550; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3551; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3552; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3553; GFX7-NEXT: s_mov_b64 s[10:11], 16 3554; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3555; GFX7-NEXT: s_mov_b32 s4, s8 3556; GFX7-NEXT: s_mov_b32 s5, s9 3557; GFX7-NEXT: s_mov_b32 s9, s10 3558; GFX7-NEXT: s_mov_b32 s8, s11 3559; GFX7-NEXT: s_add_u32 s4, s4, s9 3560; GFX7-NEXT: s_addc_u32 s8, s5, s8 3561; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3562; GFX7-NEXT: s_mov_b32 s5, s8 3563; GFX7-NEXT: v_mov_b32_e32 v2, s7 3564; GFX7-NEXT: v_mov_b32_e32 v0, s6 3565; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3566; GFX7-NEXT: v_mov_b32_e32 v3, v0 3567; GFX7-NEXT: v_mov_b32_e32 v0, s4 3568; GFX7-NEXT: v_mov_b32_e32 v1, s5 3569; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3570; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3571; GFX7-NEXT: s_endpgm 3572; 3573; GFX10-WGP-LABEL: global_system_release_monotonic_cmpxchg: 3574; GFX10-WGP: ; %bb.0: ; %entry 3575; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3576; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3577; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3578; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3579; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3580; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3581; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3582; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3583; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3584; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3585; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3586; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3587; GFX10-WGP-NEXT: s_endpgm 3588; 3589; GFX10-CU-LABEL: global_system_release_monotonic_cmpxchg: 3590; GFX10-CU: ; %bb.0: ; %entry 3591; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3592; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3593; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3594; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3595; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3596; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3597; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3598; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3599; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3600; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3601; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3602; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3603; GFX10-CU-NEXT: s_endpgm 3604; 3605; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg: 3606; SKIP-CACHE-INV: ; %bb.0: ; %entry 3607; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3608; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3609; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3610; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3611; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3612; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3613; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3614; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3615; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3616; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3617; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3618; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3619; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3620; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3621; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3622; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3623; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3624; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3625; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3626; SKIP-CACHE-INV-NEXT: s_endpgm 3627; 3628; GFX90A-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 3629; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3630; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3631; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3632; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3633; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3634; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3635; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3636; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3637; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3638; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3639; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3640; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3641; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3642; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3643; 3644; GFX90A-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 3645; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3646; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3647; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3648; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3649; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3650; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3651; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3652; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3653; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3654; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3655; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3656; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3657; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3658; GFX90A-TGSPLIT-NEXT: s_endpgm 3659; 3660; GFX940-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 3661; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3662; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3663; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3664; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3665; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3666; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3667; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3668; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3669; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3670; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3671; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3672; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3673; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3674; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3675; 3676; GFX940-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: 3677; GFX940-TGSPLIT: ; %bb.0: ; %entry 3678; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3679; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3680; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3681; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3682; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3683; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3684; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3685; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3686; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3687; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3688; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3689; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3690; GFX940-TGSPLIT-NEXT: s_endpgm 3691; 3692; GFX11-WGP-LABEL: global_system_release_monotonic_cmpxchg: 3693; GFX11-WGP: ; %bb.0: ; %entry 3694; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3695; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3696; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3697; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3698; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3699; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3700; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3701; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3702; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3703; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3704; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3705; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3706; GFX11-WGP-NEXT: s_endpgm 3707; 3708; GFX11-CU-LABEL: global_system_release_monotonic_cmpxchg: 3709; GFX11-CU: ; %bb.0: ; %entry 3710; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3711; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3712; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3713; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3714; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3715; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3716; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3717; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3718; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3719; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3720; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3721; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3722; GFX11-CU-NEXT: s_endpgm 3723; 3724; GFX12-WGP-LABEL: global_system_release_monotonic_cmpxchg: 3725; GFX12-WGP: ; %bb.0: ; %entry 3726; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3727; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3728; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3729; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3730; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3731; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3732; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3733; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3734; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3735; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 3736; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3737; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3738; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3739; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3740; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3741; GFX12-WGP-NEXT: s_endpgm 3742; 3743; GFX12-CU-LABEL: global_system_release_monotonic_cmpxchg: 3744; GFX12-CU: ; %bb.0: ; %entry 3745; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3746; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3747; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3748; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3749; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3750; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3751; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3752; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3753; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3754; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 3755; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3756; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3757; GFX12-CU-NEXT: s_wait_storecnt 0x0 3758; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3759; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 3760; GFX12-CU-NEXT: s_endpgm 3761 ptr addrspace(1) %out, i32 %in, i32 %old) { 3762entry: 3763 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3764 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release monotonic 3765 ret void 3766} 3767 3768define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( 3769; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3770; GFX6: ; %bb.0: ; %entry 3771; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3772; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3773; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3774; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3775; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3776; GFX6-NEXT: s_mov_b32 s12, s5 3777; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3778; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3779; GFX6-NEXT: s_mov_b32 s11, -1 3780; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3781; GFX6-NEXT: s_mov_b32 s5, s12 3782; GFX6-NEXT: s_mov_b32 s6, s11 3783; GFX6-NEXT: s_mov_b32 s7, s10 3784; GFX6-NEXT: v_mov_b32_e32 v0, s9 3785; GFX6-NEXT: v_mov_b32_e32 v2, s8 3786; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3787; GFX6-NEXT: v_mov_b32_e32 v1, v2 3788; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3789; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3790; GFX6-NEXT: s_waitcnt vmcnt(0) 3791; GFX6-NEXT: buffer_wbinvl1 3792; GFX6-NEXT: s_endpgm 3793; 3794; GFX7-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3795; GFX7: ; %bb.0: ; %entry 3796; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3797; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3798; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3799; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3800; GFX7-NEXT: s_mov_b64 s[10:11], 16 3801; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3802; GFX7-NEXT: s_mov_b32 s4, s8 3803; GFX7-NEXT: s_mov_b32 s5, s9 3804; GFX7-NEXT: s_mov_b32 s9, s10 3805; GFX7-NEXT: s_mov_b32 s8, s11 3806; GFX7-NEXT: s_add_u32 s4, s4, s9 3807; GFX7-NEXT: s_addc_u32 s8, s5, s8 3808; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3809; GFX7-NEXT: s_mov_b32 s5, s8 3810; GFX7-NEXT: v_mov_b32_e32 v2, s7 3811; GFX7-NEXT: v_mov_b32_e32 v0, s6 3812; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3813; GFX7-NEXT: v_mov_b32_e32 v3, v0 3814; GFX7-NEXT: v_mov_b32_e32 v0, s4 3815; GFX7-NEXT: v_mov_b32_e32 v1, s5 3816; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3817; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3818; GFX7-NEXT: s_waitcnt vmcnt(0) 3819; GFX7-NEXT: buffer_wbinvl1_vol 3820; GFX7-NEXT: s_endpgm 3821; 3822; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3823; GFX10-WGP: ; %bb.0: ; %entry 3824; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3825; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3826; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3827; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3828; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3829; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3830; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3831; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3832; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3833; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3834; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3835; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3836; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3837; GFX10-WGP-NEXT: buffer_gl1_inv 3838; GFX10-WGP-NEXT: buffer_gl0_inv 3839; GFX10-WGP-NEXT: s_endpgm 3840; 3841; GFX10-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3842; GFX10-CU: ; %bb.0: ; %entry 3843; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3844; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3845; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3846; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3847; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3848; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3849; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3850; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3851; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3852; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3853; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3854; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3855; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3856; GFX10-CU-NEXT: buffer_gl1_inv 3857; GFX10-CU-NEXT: buffer_gl0_inv 3858; GFX10-CU-NEXT: s_endpgm 3859; 3860; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3861; SKIP-CACHE-INV: ; %bb.0: ; %entry 3862; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3863; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3864; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3865; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3866; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3867; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3868; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3869; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3870; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3871; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3872; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3873; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3874; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3875; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3876; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3877; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3878; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3879; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3880; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3881; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3882; SKIP-CACHE-INV-NEXT: s_endpgm 3883; 3884; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3885; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3886; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3887; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3888; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3889; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3890; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3891; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3892; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3893; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3894; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3895; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3896; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3897; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3898; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3899; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3900; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3901; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3902; 3903; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3904; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3905; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3906; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3907; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3908; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3909; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3910; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3911; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3912; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3913; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3914; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3915; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3916; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3917; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3918; GFX90A-TGSPLIT-NEXT: buffer_invl2 3919; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3920; GFX90A-TGSPLIT-NEXT: s_endpgm 3921; 3922; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3923; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3924; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3925; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3926; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3927; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3928; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3929; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3930; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3931; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3932; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3933; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3934; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3935; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3936; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 3937; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 3938; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3939; 3940; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3941; GFX940-TGSPLIT: ; %bb.0: ; %entry 3942; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3943; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3944; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3945; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3946; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3947; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3948; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3949; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3950; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3951; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3952; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3953; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 3954; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3955; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 3956; GFX940-TGSPLIT-NEXT: s_endpgm 3957; 3958; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3959; GFX11-WGP: ; %bb.0: ; %entry 3960; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3961; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3962; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3963; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3964; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3965; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3966; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3967; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3968; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3969; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3970; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3971; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3972; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3973; GFX11-WGP-NEXT: buffer_gl1_inv 3974; GFX11-WGP-NEXT: buffer_gl0_inv 3975; GFX11-WGP-NEXT: s_endpgm 3976; 3977; GFX11-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3978; GFX11-CU: ; %bb.0: ; %entry 3979; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3980; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3981; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3982; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3983; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3984; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3985; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3986; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3987; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3988; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3989; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3990; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3991; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3992; GFX11-CU-NEXT: buffer_gl1_inv 3993; GFX11-CU-NEXT: buffer_gl0_inv 3994; GFX11-CU-NEXT: s_endpgm 3995; 3996; GFX12-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: 3997; GFX12-WGP: ; %bb.0: ; %entry 3998; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3999; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4000; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4001; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4002; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4003; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4004; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4005; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4006; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4007; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 4008; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4009; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4010; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4011; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4012; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4013; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4014; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4015; GFX12-WGP-NEXT: s_endpgm 4016; 4017; GFX12-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: 4018; GFX12-CU: ; %bb.0: ; %entry 4019; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4020; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4021; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4022; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4023; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4024; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4025; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4026; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4027; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4028; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 4029; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 4030; GFX12-CU-NEXT: s_wait_samplecnt 0x0 4031; GFX12-CU-NEXT: s_wait_storecnt 0x0 4032; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 4033; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4034; GFX12-CU-NEXT: s_wait_storecnt 0x0 4035; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4036; GFX12-CU-NEXT: s_endpgm 4037 ptr addrspace(1) %out, i32 %in, i32 %old) { 4038entry: 4039 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4040 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic 4041 ret void 4042} 4043 4044define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( 4045; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4046; GFX6: ; %bb.0: ; %entry 4047; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4048; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4049; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4050; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4051; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4052; GFX6-NEXT: s_mov_b32 s12, s5 4053; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4054; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4055; GFX6-NEXT: s_mov_b32 s11, -1 4056; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4057; GFX6-NEXT: s_mov_b32 s5, s12 4058; GFX6-NEXT: s_mov_b32 s6, s11 4059; GFX6-NEXT: s_mov_b32 s7, s10 4060; GFX6-NEXT: v_mov_b32_e32 v0, s9 4061; GFX6-NEXT: v_mov_b32_e32 v2, s8 4062; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4063; GFX6-NEXT: v_mov_b32_e32 v1, v2 4064; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4065; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4066; GFX6-NEXT: s_waitcnt vmcnt(0) 4067; GFX6-NEXT: buffer_wbinvl1 4068; GFX6-NEXT: s_endpgm 4069; 4070; GFX7-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4071; GFX7: ; %bb.0: ; %entry 4072; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4073; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4074; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4075; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4076; GFX7-NEXT: s_mov_b64 s[10:11], 16 4077; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4078; GFX7-NEXT: s_mov_b32 s4, s8 4079; GFX7-NEXT: s_mov_b32 s5, s9 4080; GFX7-NEXT: s_mov_b32 s9, s10 4081; GFX7-NEXT: s_mov_b32 s8, s11 4082; GFX7-NEXT: s_add_u32 s4, s4, s9 4083; GFX7-NEXT: s_addc_u32 s8, s5, s8 4084; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4085; GFX7-NEXT: s_mov_b32 s5, s8 4086; GFX7-NEXT: v_mov_b32_e32 v2, s7 4087; GFX7-NEXT: v_mov_b32_e32 v0, s6 4088; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4089; GFX7-NEXT: v_mov_b32_e32 v3, v0 4090; GFX7-NEXT: v_mov_b32_e32 v0, s4 4091; GFX7-NEXT: v_mov_b32_e32 v1, s5 4092; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4093; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4094; GFX7-NEXT: s_waitcnt vmcnt(0) 4095; GFX7-NEXT: buffer_wbinvl1_vol 4096; GFX7-NEXT: s_endpgm 4097; 4098; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4099; GFX10-WGP: ; %bb.0: ; %entry 4100; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4101; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4102; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4103; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4104; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4105; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4106; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4107; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4108; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4109; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4110; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4111; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4112; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4113; GFX10-WGP-NEXT: buffer_gl1_inv 4114; GFX10-WGP-NEXT: buffer_gl0_inv 4115; GFX10-WGP-NEXT: s_endpgm 4116; 4117; GFX10-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4118; GFX10-CU: ; %bb.0: ; %entry 4119; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4120; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4121; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4122; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4123; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4124; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4125; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4126; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4127; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4128; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4129; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4130; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4131; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4132; GFX10-CU-NEXT: buffer_gl1_inv 4133; GFX10-CU-NEXT: buffer_gl0_inv 4134; GFX10-CU-NEXT: s_endpgm 4135; 4136; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4137; SKIP-CACHE-INV: ; %bb.0: ; %entry 4138; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4139; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4140; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4141; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4142; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4143; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4144; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4145; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4146; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4147; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4148; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4149; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4150; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4151; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4152; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4153; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4154; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4155; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4156; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4157; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4158; SKIP-CACHE-INV-NEXT: s_endpgm 4159; 4160; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4161; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4162; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4163; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4164; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4165; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4166; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4167; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4168; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4169; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4170; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4171; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4172; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4173; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4174; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4175; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4176; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4177; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4178; 4179; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4180; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4181; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4182; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4183; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4184; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4185; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4186; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4187; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4188; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4189; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4190; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4191; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4192; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4193; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4194; GFX90A-TGSPLIT-NEXT: buffer_invl2 4195; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4196; GFX90A-TGSPLIT-NEXT: s_endpgm 4197; 4198; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4199; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4200; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4201; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4202; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4203; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4204; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4205; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4206; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4207; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4208; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4209; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4210; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4211; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4212; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4213; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4214; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4215; 4216; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4217; GFX940-TGSPLIT: ; %bb.0: ; %entry 4218; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4219; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4220; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4221; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4222; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4223; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4224; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4225; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4226; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4227; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4228; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4229; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4230; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4231; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4232; GFX940-TGSPLIT-NEXT: s_endpgm 4233; 4234; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4235; GFX11-WGP: ; %bb.0: ; %entry 4236; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4237; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4238; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4239; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4240; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4241; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4242; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4243; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4244; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4245; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4246; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4247; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4248; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4249; GFX11-WGP-NEXT: buffer_gl1_inv 4250; GFX11-WGP-NEXT: buffer_gl0_inv 4251; GFX11-WGP-NEXT: s_endpgm 4252; 4253; GFX11-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4254; GFX11-CU: ; %bb.0: ; %entry 4255; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4256; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4257; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4258; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4259; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4260; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4261; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4262; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4263; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4264; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4265; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4266; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4267; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4268; GFX11-CU-NEXT: buffer_gl1_inv 4269; GFX11-CU-NEXT: buffer_gl0_inv 4270; GFX11-CU-NEXT: s_endpgm 4271; 4272; GFX12-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4273; GFX12-WGP: ; %bb.0: ; %entry 4274; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4275; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4276; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4277; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4278; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4279; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4280; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4281; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4282; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4283; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 4284; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4285; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4286; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4287; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4288; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4289; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4290; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4291; GFX12-WGP-NEXT: s_endpgm 4292; 4293; GFX12-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: 4294; GFX12-CU: ; %bb.0: ; %entry 4295; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4296; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4297; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4298; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4299; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4300; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4301; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4302; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4303; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4304; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 4305; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 4306; GFX12-CU-NEXT: s_wait_samplecnt 0x0 4307; GFX12-CU-NEXT: s_wait_storecnt 0x0 4308; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 4309; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4310; GFX12-CU-NEXT: s_wait_storecnt 0x0 4311; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4312; GFX12-CU-NEXT: s_endpgm 4313 ptr addrspace(1) %out, i32 %in, i32 %old) { 4314entry: 4315 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4316 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic 4317 ret void 4318} 4319 4320define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( 4321; GFX6-LABEL: global_system_monotonic_acquire_cmpxchg: 4322; GFX6: ; %bb.0: ; %entry 4323; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4324; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4325; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4326; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4327; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4328; GFX6-NEXT: s_mov_b32 s12, s5 4329; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4330; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4331; GFX6-NEXT: s_mov_b32 s11, -1 4332; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4333; GFX6-NEXT: s_mov_b32 s5, s12 4334; GFX6-NEXT: s_mov_b32 s6, s11 4335; GFX6-NEXT: s_mov_b32 s7, s10 4336; GFX6-NEXT: v_mov_b32_e32 v0, s9 4337; GFX6-NEXT: v_mov_b32_e32 v2, s8 4338; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4339; GFX6-NEXT: v_mov_b32_e32 v1, v2 4340; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4341; GFX6-NEXT: s_waitcnt vmcnt(0) 4342; GFX6-NEXT: buffer_wbinvl1 4343; GFX6-NEXT: s_endpgm 4344; 4345; GFX7-LABEL: global_system_monotonic_acquire_cmpxchg: 4346; GFX7: ; %bb.0: ; %entry 4347; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4348; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4349; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4350; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4351; GFX7-NEXT: s_mov_b64 s[10:11], 16 4352; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4353; GFX7-NEXT: s_mov_b32 s4, s8 4354; GFX7-NEXT: s_mov_b32 s5, s9 4355; GFX7-NEXT: s_mov_b32 s9, s10 4356; GFX7-NEXT: s_mov_b32 s8, s11 4357; GFX7-NEXT: s_add_u32 s4, s4, s9 4358; GFX7-NEXT: s_addc_u32 s8, s5, s8 4359; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4360; GFX7-NEXT: s_mov_b32 s5, s8 4361; GFX7-NEXT: v_mov_b32_e32 v2, s7 4362; GFX7-NEXT: v_mov_b32_e32 v0, s6 4363; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4364; GFX7-NEXT: v_mov_b32_e32 v3, v0 4365; GFX7-NEXT: v_mov_b32_e32 v0, s4 4366; GFX7-NEXT: v_mov_b32_e32 v1, s5 4367; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4368; GFX7-NEXT: s_waitcnt vmcnt(0) 4369; GFX7-NEXT: buffer_wbinvl1_vol 4370; GFX7-NEXT: s_endpgm 4371; 4372; GFX10-WGP-LABEL: global_system_monotonic_acquire_cmpxchg: 4373; GFX10-WGP: ; %bb.0: ; %entry 4374; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4375; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4376; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4377; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4378; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4379; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4380; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4381; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4382; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4383; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4384; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4385; GFX10-WGP-NEXT: buffer_gl1_inv 4386; GFX10-WGP-NEXT: buffer_gl0_inv 4387; GFX10-WGP-NEXT: s_endpgm 4388; 4389; GFX10-CU-LABEL: global_system_monotonic_acquire_cmpxchg: 4390; GFX10-CU: ; %bb.0: ; %entry 4391; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4392; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4393; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4394; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4395; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4396; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4397; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4398; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4399; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4400; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4401; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4402; GFX10-CU-NEXT: buffer_gl1_inv 4403; GFX10-CU-NEXT: buffer_gl0_inv 4404; GFX10-CU-NEXT: s_endpgm 4405; 4406; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg: 4407; SKIP-CACHE-INV: ; %bb.0: ; %entry 4408; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4409; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4410; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4411; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4412; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4413; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4414; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4415; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4416; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4417; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4418; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4419; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4420; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4421; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4422; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4423; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4424; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4425; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4426; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4427; SKIP-CACHE-INV-NEXT: s_endpgm 4428; 4429; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: 4430; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4431; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4432; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4433; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4434; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4435; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4436; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4437; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4438; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4439; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4440; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4441; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4442; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4443; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4444; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4445; 4446; GFX90A-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: 4447; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4448; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4449; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4450; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4451; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4452; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4453; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4454; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4455; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4456; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4457; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4458; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4459; GFX90A-TGSPLIT-NEXT: buffer_invl2 4460; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4461; GFX90A-TGSPLIT-NEXT: s_endpgm 4462; 4463; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: 4464; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4465; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4466; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4467; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4468; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4469; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4470; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4471; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4472; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4473; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4474; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4475; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4476; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4477; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4478; 4479; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: 4480; GFX940-TGSPLIT: ; %bb.0: ; %entry 4481; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4482; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4483; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4484; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4485; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4486; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4487; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4488; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4489; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4490; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4491; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4492; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4493; GFX940-TGSPLIT-NEXT: s_endpgm 4494; 4495; GFX11-WGP-LABEL: global_system_monotonic_acquire_cmpxchg: 4496; GFX11-WGP: ; %bb.0: ; %entry 4497; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4498; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4499; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4500; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4501; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4502; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4503; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4504; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4505; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4506; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4507; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4508; GFX11-WGP-NEXT: buffer_gl1_inv 4509; GFX11-WGP-NEXT: buffer_gl0_inv 4510; GFX11-WGP-NEXT: s_endpgm 4511; 4512; GFX11-CU-LABEL: global_system_monotonic_acquire_cmpxchg: 4513; GFX11-CU: ; %bb.0: ; %entry 4514; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4515; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4516; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4517; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4518; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4519; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4520; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4521; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4522; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4523; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4524; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4525; GFX11-CU-NEXT: buffer_gl1_inv 4526; GFX11-CU-NEXT: buffer_gl0_inv 4527; GFX11-CU-NEXT: s_endpgm 4528; 4529; GFX12-WGP-LABEL: global_system_monotonic_acquire_cmpxchg: 4530; GFX12-WGP: ; %bb.0: ; %entry 4531; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4532; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4533; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4534; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4535; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4536; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4537; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4538; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4539; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4540; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4541; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4542; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4543; GFX12-WGP-NEXT: s_endpgm 4544; 4545; GFX12-CU-LABEL: global_system_monotonic_acquire_cmpxchg: 4546; GFX12-CU: ; %bb.0: ; %entry 4547; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4548; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4549; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4550; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4551; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4552; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4553; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4554; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4555; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4556; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4557; GFX12-CU-NEXT: s_wait_storecnt 0x0 4558; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4559; GFX12-CU-NEXT: s_endpgm 4560 ptr addrspace(1) %out, i32 %in, i32 %old) { 4561entry: 4562 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4563 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire 4564 ret void 4565} 4566 4567define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( 4568; GFX6-LABEL: global_system_acquire_acquire_cmpxchg: 4569; GFX6: ; %bb.0: ; %entry 4570; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4571; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4572; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4573; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4574; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4575; GFX6-NEXT: s_mov_b32 s12, s5 4576; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4577; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4578; GFX6-NEXT: s_mov_b32 s11, -1 4579; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4580; GFX6-NEXT: s_mov_b32 s5, s12 4581; GFX6-NEXT: s_mov_b32 s6, s11 4582; GFX6-NEXT: s_mov_b32 s7, s10 4583; GFX6-NEXT: v_mov_b32_e32 v0, s9 4584; GFX6-NEXT: v_mov_b32_e32 v2, s8 4585; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4586; GFX6-NEXT: v_mov_b32_e32 v1, v2 4587; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4588; GFX6-NEXT: s_waitcnt vmcnt(0) 4589; GFX6-NEXT: buffer_wbinvl1 4590; GFX6-NEXT: s_endpgm 4591; 4592; GFX7-LABEL: global_system_acquire_acquire_cmpxchg: 4593; GFX7: ; %bb.0: ; %entry 4594; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4595; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4596; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4597; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4598; GFX7-NEXT: s_mov_b64 s[10:11], 16 4599; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4600; GFX7-NEXT: s_mov_b32 s4, s8 4601; GFX7-NEXT: s_mov_b32 s5, s9 4602; GFX7-NEXT: s_mov_b32 s9, s10 4603; GFX7-NEXT: s_mov_b32 s8, s11 4604; GFX7-NEXT: s_add_u32 s4, s4, s9 4605; GFX7-NEXT: s_addc_u32 s8, s5, s8 4606; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4607; GFX7-NEXT: s_mov_b32 s5, s8 4608; GFX7-NEXT: v_mov_b32_e32 v2, s7 4609; GFX7-NEXT: v_mov_b32_e32 v0, s6 4610; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4611; GFX7-NEXT: v_mov_b32_e32 v3, v0 4612; GFX7-NEXT: v_mov_b32_e32 v0, s4 4613; GFX7-NEXT: v_mov_b32_e32 v1, s5 4614; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4615; GFX7-NEXT: s_waitcnt vmcnt(0) 4616; GFX7-NEXT: buffer_wbinvl1_vol 4617; GFX7-NEXT: s_endpgm 4618; 4619; GFX10-WGP-LABEL: global_system_acquire_acquire_cmpxchg: 4620; GFX10-WGP: ; %bb.0: ; %entry 4621; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4622; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4623; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4624; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4625; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4626; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4627; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4628; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4629; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4630; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4631; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4632; GFX10-WGP-NEXT: buffer_gl1_inv 4633; GFX10-WGP-NEXT: buffer_gl0_inv 4634; GFX10-WGP-NEXT: s_endpgm 4635; 4636; GFX10-CU-LABEL: global_system_acquire_acquire_cmpxchg: 4637; GFX10-CU: ; %bb.0: ; %entry 4638; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4639; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4640; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4641; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4642; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4643; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4644; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4645; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4646; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4647; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4648; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4649; GFX10-CU-NEXT: buffer_gl1_inv 4650; GFX10-CU-NEXT: buffer_gl0_inv 4651; GFX10-CU-NEXT: s_endpgm 4652; 4653; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: 4654; SKIP-CACHE-INV: ; %bb.0: ; %entry 4655; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4656; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4657; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4658; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4659; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4660; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4661; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4662; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4663; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4664; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4665; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4666; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4667; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4668; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4669; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4670; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4671; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4672; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4673; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4674; SKIP-CACHE-INV-NEXT: s_endpgm 4675; 4676; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 4677; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4678; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4679; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4680; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4681; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4682; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4683; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4684; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4685; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4686; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4687; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4688; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4689; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4690; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4691; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4692; 4693; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 4694; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4695; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4696; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4697; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4698; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4699; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4700; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4701; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4702; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4703; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4704; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4705; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4706; GFX90A-TGSPLIT-NEXT: buffer_invl2 4707; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4708; GFX90A-TGSPLIT-NEXT: s_endpgm 4709; 4710; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 4711; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4712; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4713; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4714; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4715; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4716; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4717; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4718; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4719; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4720; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4721; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4722; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4723; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4724; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4725; 4726; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: 4727; GFX940-TGSPLIT: ; %bb.0: ; %entry 4728; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4729; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4730; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4731; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4732; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4733; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4734; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4735; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4736; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4737; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4738; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4739; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4740; GFX940-TGSPLIT-NEXT: s_endpgm 4741; 4742; GFX11-WGP-LABEL: global_system_acquire_acquire_cmpxchg: 4743; GFX11-WGP: ; %bb.0: ; %entry 4744; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4745; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4746; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4747; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4748; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4749; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4750; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4751; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4752; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4753; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4754; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4755; GFX11-WGP-NEXT: buffer_gl1_inv 4756; GFX11-WGP-NEXT: buffer_gl0_inv 4757; GFX11-WGP-NEXT: s_endpgm 4758; 4759; GFX11-CU-LABEL: global_system_acquire_acquire_cmpxchg: 4760; GFX11-CU: ; %bb.0: ; %entry 4761; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4762; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4763; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4764; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4765; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4766; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4767; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4768; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4769; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4770; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4771; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4772; GFX11-CU-NEXT: buffer_gl1_inv 4773; GFX11-CU-NEXT: buffer_gl0_inv 4774; GFX11-CU-NEXT: s_endpgm 4775; 4776; GFX12-WGP-LABEL: global_system_acquire_acquire_cmpxchg: 4777; GFX12-WGP: ; %bb.0: ; %entry 4778; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4779; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4780; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4781; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4782; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4783; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4784; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4785; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4786; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4787; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4788; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4789; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4790; GFX12-WGP-NEXT: s_endpgm 4791; 4792; GFX12-CU-LABEL: global_system_acquire_acquire_cmpxchg: 4793; GFX12-CU: ; %bb.0: ; %entry 4794; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4795; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4796; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4797; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4798; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4799; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4800; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4801; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4802; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4803; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 4804; GFX12-CU-NEXT: s_wait_storecnt 0x0 4805; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4806; GFX12-CU-NEXT: s_endpgm 4807 ptr addrspace(1) %out, i32 %in, i32 %old) { 4808entry: 4809 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4810 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire 4811 ret void 4812} 4813 4814define amdgpu_kernel void @global_system_release_acquire_cmpxchg( 4815; GFX6-LABEL: global_system_release_acquire_cmpxchg: 4816; GFX6: ; %bb.0: ; %entry 4817; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4818; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4819; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4820; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4821; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4822; GFX6-NEXT: s_mov_b32 s12, s5 4823; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4824; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4825; GFX6-NEXT: s_mov_b32 s11, -1 4826; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4827; GFX6-NEXT: s_mov_b32 s5, s12 4828; GFX6-NEXT: s_mov_b32 s6, s11 4829; GFX6-NEXT: s_mov_b32 s7, s10 4830; GFX6-NEXT: v_mov_b32_e32 v0, s9 4831; GFX6-NEXT: v_mov_b32_e32 v2, s8 4832; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4833; GFX6-NEXT: v_mov_b32_e32 v1, v2 4834; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4835; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4836; GFX6-NEXT: s_waitcnt vmcnt(0) 4837; GFX6-NEXT: buffer_wbinvl1 4838; GFX6-NEXT: s_endpgm 4839; 4840; GFX7-LABEL: global_system_release_acquire_cmpxchg: 4841; GFX7: ; %bb.0: ; %entry 4842; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4843; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4844; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4845; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4846; GFX7-NEXT: s_mov_b64 s[10:11], 16 4847; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4848; GFX7-NEXT: s_mov_b32 s4, s8 4849; GFX7-NEXT: s_mov_b32 s5, s9 4850; GFX7-NEXT: s_mov_b32 s9, s10 4851; GFX7-NEXT: s_mov_b32 s8, s11 4852; GFX7-NEXT: s_add_u32 s4, s4, s9 4853; GFX7-NEXT: s_addc_u32 s8, s5, s8 4854; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4855; GFX7-NEXT: s_mov_b32 s5, s8 4856; GFX7-NEXT: v_mov_b32_e32 v2, s7 4857; GFX7-NEXT: v_mov_b32_e32 v0, s6 4858; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4859; GFX7-NEXT: v_mov_b32_e32 v3, v0 4860; GFX7-NEXT: v_mov_b32_e32 v0, s4 4861; GFX7-NEXT: v_mov_b32_e32 v1, s5 4862; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4863; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4864; GFX7-NEXT: s_waitcnt vmcnt(0) 4865; GFX7-NEXT: buffer_wbinvl1_vol 4866; GFX7-NEXT: s_endpgm 4867; 4868; GFX10-WGP-LABEL: global_system_release_acquire_cmpxchg: 4869; GFX10-WGP: ; %bb.0: ; %entry 4870; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4871; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4872; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4873; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4874; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4875; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4876; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4877; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4878; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4879; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4880; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4881; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4882; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4883; GFX10-WGP-NEXT: buffer_gl1_inv 4884; GFX10-WGP-NEXT: buffer_gl0_inv 4885; GFX10-WGP-NEXT: s_endpgm 4886; 4887; GFX10-CU-LABEL: global_system_release_acquire_cmpxchg: 4888; GFX10-CU: ; %bb.0: ; %entry 4889; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4890; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4891; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4892; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4893; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4894; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4895; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4896; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4897; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4898; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4899; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4900; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4901; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4902; GFX10-CU-NEXT: buffer_gl1_inv 4903; GFX10-CU-NEXT: buffer_gl0_inv 4904; GFX10-CU-NEXT: s_endpgm 4905; 4906; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: 4907; SKIP-CACHE-INV: ; %bb.0: ; %entry 4908; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4909; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4910; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4911; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4912; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4913; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4914; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4915; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4916; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4917; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4918; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4919; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4920; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4921; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4922; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4923; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4924; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4925; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4926; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4927; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4928; SKIP-CACHE-INV-NEXT: s_endpgm 4929; 4930; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 4931; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4932; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4933; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4934; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4935; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4936; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4937; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4938; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4939; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4940; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4941; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4942; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4943; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4944; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4945; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4946; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4947; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4948; 4949; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 4950; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4951; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4952; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4953; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4954; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4955; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4956; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4957; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4958; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4959; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4960; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4961; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4962; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4963; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4964; GFX90A-TGSPLIT-NEXT: buffer_invl2 4965; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4966; GFX90A-TGSPLIT-NEXT: s_endpgm 4967; 4968; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 4969; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4970; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4971; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4972; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4973; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4974; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4975; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4976; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4977; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4978; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4979; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4980; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4981; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 4982; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 4983; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4984; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4985; 4986; GFX940-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg: 4987; GFX940-TGSPLIT: ; %bb.0: ; %entry 4988; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4989; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4990; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4991; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4992; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4993; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4994; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4995; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4996; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4997; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4998; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4999; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5000; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5001; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5002; GFX940-TGSPLIT-NEXT: s_endpgm 5003; 5004; GFX11-WGP-LABEL: global_system_release_acquire_cmpxchg: 5005; GFX11-WGP: ; %bb.0: ; %entry 5006; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5007; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5008; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5009; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5010; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5011; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5012; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5013; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5014; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5015; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5016; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5017; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5018; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5019; GFX11-WGP-NEXT: buffer_gl1_inv 5020; GFX11-WGP-NEXT: buffer_gl0_inv 5021; GFX11-WGP-NEXT: s_endpgm 5022; 5023; GFX11-CU-LABEL: global_system_release_acquire_cmpxchg: 5024; GFX11-CU: ; %bb.0: ; %entry 5025; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5026; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5027; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5028; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5029; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5030; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5031; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5032; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5033; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5034; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5035; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5036; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5037; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5038; GFX11-CU-NEXT: buffer_gl1_inv 5039; GFX11-CU-NEXT: buffer_gl0_inv 5040; GFX11-CU-NEXT: s_endpgm 5041; 5042; GFX12-WGP-LABEL: global_system_release_acquire_cmpxchg: 5043; GFX12-WGP: ; %bb.0: ; %entry 5044; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5045; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5046; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5047; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5048; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5049; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5050; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5051; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5052; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5053; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5054; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5055; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5056; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5057; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5058; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5059; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5060; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5061; GFX12-WGP-NEXT: s_endpgm 5062; 5063; GFX12-CU-LABEL: global_system_release_acquire_cmpxchg: 5064; GFX12-CU: ; %bb.0: ; %entry 5065; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5066; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5067; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5068; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5069; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5070; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5071; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5072; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5073; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5074; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5075; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5076; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5077; GFX12-CU-NEXT: s_wait_storecnt 0x0 5078; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5079; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5080; GFX12-CU-NEXT: s_wait_storecnt 0x0 5081; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5082; GFX12-CU-NEXT: s_endpgm 5083 ptr addrspace(1) %out, i32 %in, i32 %old) { 5084entry: 5085 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5086 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire 5087 ret void 5088} 5089 5090define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( 5091; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg: 5092; GFX6: ; %bb.0: ; %entry 5093; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5094; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5095; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5096; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5097; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5098; GFX6-NEXT: s_mov_b32 s12, s5 5099; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5100; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5101; GFX6-NEXT: s_mov_b32 s11, -1 5102; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5103; GFX6-NEXT: s_mov_b32 s5, s12 5104; GFX6-NEXT: s_mov_b32 s6, s11 5105; GFX6-NEXT: s_mov_b32 s7, s10 5106; GFX6-NEXT: v_mov_b32_e32 v0, s9 5107; GFX6-NEXT: v_mov_b32_e32 v2, s8 5108; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5109; GFX6-NEXT: v_mov_b32_e32 v1, v2 5110; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5111; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5112; GFX6-NEXT: s_waitcnt vmcnt(0) 5113; GFX6-NEXT: buffer_wbinvl1 5114; GFX6-NEXT: s_endpgm 5115; 5116; GFX7-LABEL: global_system_acq_rel_acquire_cmpxchg: 5117; GFX7: ; %bb.0: ; %entry 5118; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5119; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5120; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5121; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5122; GFX7-NEXT: s_mov_b64 s[10:11], 16 5123; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5124; GFX7-NEXT: s_mov_b32 s4, s8 5125; GFX7-NEXT: s_mov_b32 s5, s9 5126; GFX7-NEXT: s_mov_b32 s9, s10 5127; GFX7-NEXT: s_mov_b32 s8, s11 5128; GFX7-NEXT: s_add_u32 s4, s4, s9 5129; GFX7-NEXT: s_addc_u32 s8, s5, s8 5130; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5131; GFX7-NEXT: s_mov_b32 s5, s8 5132; GFX7-NEXT: v_mov_b32_e32 v2, s7 5133; GFX7-NEXT: v_mov_b32_e32 v0, s6 5134; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5135; GFX7-NEXT: v_mov_b32_e32 v3, v0 5136; GFX7-NEXT: v_mov_b32_e32 v0, s4 5137; GFX7-NEXT: v_mov_b32_e32 v1, s5 5138; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5139; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5140; GFX7-NEXT: s_waitcnt vmcnt(0) 5141; GFX7-NEXT: buffer_wbinvl1_vol 5142; GFX7-NEXT: s_endpgm 5143; 5144; GFX10-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: 5145; GFX10-WGP: ; %bb.0: ; %entry 5146; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5147; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5148; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5149; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5150; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5151; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5152; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5153; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5154; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5155; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5156; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5157; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5158; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5159; GFX10-WGP-NEXT: buffer_gl1_inv 5160; GFX10-WGP-NEXT: buffer_gl0_inv 5161; GFX10-WGP-NEXT: s_endpgm 5162; 5163; GFX10-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: 5164; GFX10-CU: ; %bb.0: ; %entry 5165; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5166; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5167; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5168; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5169; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5170; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5171; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5172; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5173; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5174; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5175; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5176; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5177; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5178; GFX10-CU-NEXT: buffer_gl1_inv 5179; GFX10-CU-NEXT: buffer_gl0_inv 5180; GFX10-CU-NEXT: s_endpgm 5181; 5182; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: 5183; SKIP-CACHE-INV: ; %bb.0: ; %entry 5184; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5185; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5186; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5187; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5188; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5189; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5190; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5191; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5192; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5193; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5194; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5195; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5196; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5197; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5198; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5199; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5200; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5201; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5202; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5203; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5204; SKIP-CACHE-INV-NEXT: s_endpgm 5205; 5206; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 5207; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5208; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5209; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5210; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5211; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5212; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5213; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5214; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5215; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5216; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5217; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5218; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5219; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5220; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5221; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5222; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5223; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5224; 5225; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 5226; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5227; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5228; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5229; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5230; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5231; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5232; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5233; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5234; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5235; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5236; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5237; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5238; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5239; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5240; GFX90A-TGSPLIT-NEXT: buffer_invl2 5241; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5242; GFX90A-TGSPLIT-NEXT: s_endpgm 5243; 5244; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 5245; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5246; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5247; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5248; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5249; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5250; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5251; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5252; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5253; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5254; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5255; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5256; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5257; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5258; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5259; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5260; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5261; 5262; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: 5263; GFX940-TGSPLIT: ; %bb.0: ; %entry 5264; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5265; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5266; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5267; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5268; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5269; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5270; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5271; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5272; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5273; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5274; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5275; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5276; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5277; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5278; GFX940-TGSPLIT-NEXT: s_endpgm 5279; 5280; GFX11-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: 5281; GFX11-WGP: ; %bb.0: ; %entry 5282; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5283; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5284; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5285; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5286; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5287; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5288; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5289; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5290; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5291; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5292; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5293; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5294; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5295; GFX11-WGP-NEXT: buffer_gl1_inv 5296; GFX11-WGP-NEXT: buffer_gl0_inv 5297; GFX11-WGP-NEXT: s_endpgm 5298; 5299; GFX11-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: 5300; GFX11-CU: ; %bb.0: ; %entry 5301; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5302; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5303; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5304; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5305; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5306; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5307; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5308; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5309; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5310; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5311; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5312; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5313; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5314; GFX11-CU-NEXT: buffer_gl1_inv 5315; GFX11-CU-NEXT: buffer_gl0_inv 5316; GFX11-CU-NEXT: s_endpgm 5317; 5318; GFX12-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: 5319; GFX12-WGP: ; %bb.0: ; %entry 5320; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5321; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5322; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5323; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5324; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5325; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5326; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5327; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5328; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5329; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5330; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5331; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5332; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5333; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5334; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5335; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5336; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5337; GFX12-WGP-NEXT: s_endpgm 5338; 5339; GFX12-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: 5340; GFX12-CU: ; %bb.0: ; %entry 5341; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5342; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5343; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5344; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5345; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5346; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5347; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5348; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5349; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5350; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5351; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5352; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5353; GFX12-CU-NEXT: s_wait_storecnt 0x0 5354; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5355; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5356; GFX12-CU-NEXT: s_wait_storecnt 0x0 5357; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5358; GFX12-CU-NEXT: s_endpgm 5359 ptr addrspace(1) %out, i32 %in, i32 %old) { 5360entry: 5361 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5362 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire 5363 ret void 5364} 5365 5366define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( 5367; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg: 5368; GFX6: ; %bb.0: ; %entry 5369; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5370; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5371; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5372; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5373; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5374; GFX6-NEXT: s_mov_b32 s12, s5 5375; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5376; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5377; GFX6-NEXT: s_mov_b32 s11, -1 5378; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5379; GFX6-NEXT: s_mov_b32 s5, s12 5380; GFX6-NEXT: s_mov_b32 s6, s11 5381; GFX6-NEXT: s_mov_b32 s7, s10 5382; GFX6-NEXT: v_mov_b32_e32 v0, s9 5383; GFX6-NEXT: v_mov_b32_e32 v2, s8 5384; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5385; GFX6-NEXT: v_mov_b32_e32 v1, v2 5386; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5387; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5388; GFX6-NEXT: s_waitcnt vmcnt(0) 5389; GFX6-NEXT: buffer_wbinvl1 5390; GFX6-NEXT: s_endpgm 5391; 5392; GFX7-LABEL: global_system_seq_cst_acquire_cmpxchg: 5393; GFX7: ; %bb.0: ; %entry 5394; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5395; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5396; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5397; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5398; GFX7-NEXT: s_mov_b64 s[10:11], 16 5399; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5400; GFX7-NEXT: s_mov_b32 s4, s8 5401; GFX7-NEXT: s_mov_b32 s5, s9 5402; GFX7-NEXT: s_mov_b32 s9, s10 5403; GFX7-NEXT: s_mov_b32 s8, s11 5404; GFX7-NEXT: s_add_u32 s4, s4, s9 5405; GFX7-NEXT: s_addc_u32 s8, s5, s8 5406; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5407; GFX7-NEXT: s_mov_b32 s5, s8 5408; GFX7-NEXT: v_mov_b32_e32 v2, s7 5409; GFX7-NEXT: v_mov_b32_e32 v0, s6 5410; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5411; GFX7-NEXT: v_mov_b32_e32 v3, v0 5412; GFX7-NEXT: v_mov_b32_e32 v0, s4 5413; GFX7-NEXT: v_mov_b32_e32 v1, s5 5414; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5415; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5416; GFX7-NEXT: s_waitcnt vmcnt(0) 5417; GFX7-NEXT: buffer_wbinvl1_vol 5418; GFX7-NEXT: s_endpgm 5419; 5420; GFX10-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: 5421; GFX10-WGP: ; %bb.0: ; %entry 5422; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5423; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5424; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5425; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5426; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5427; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5428; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5429; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5430; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5431; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5432; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5433; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5434; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5435; GFX10-WGP-NEXT: buffer_gl1_inv 5436; GFX10-WGP-NEXT: buffer_gl0_inv 5437; GFX10-WGP-NEXT: s_endpgm 5438; 5439; GFX10-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: 5440; GFX10-CU: ; %bb.0: ; %entry 5441; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5442; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5443; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5444; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5445; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5446; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5447; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5448; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5449; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5450; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5451; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5452; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5453; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5454; GFX10-CU-NEXT: buffer_gl1_inv 5455; GFX10-CU-NEXT: buffer_gl0_inv 5456; GFX10-CU-NEXT: s_endpgm 5457; 5458; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: 5459; SKIP-CACHE-INV: ; %bb.0: ; %entry 5460; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5461; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5462; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5463; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5464; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5465; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5466; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5467; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5468; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5469; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5470; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5471; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5472; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5473; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5474; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5475; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5476; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5477; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5478; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5479; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5480; SKIP-CACHE-INV-NEXT: s_endpgm 5481; 5482; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 5483; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5484; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5485; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5486; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5487; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5488; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5489; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5490; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5491; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5492; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5493; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5494; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5495; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5496; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5497; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5498; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5499; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5500; 5501; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 5502; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5503; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5504; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5505; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5506; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5507; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5508; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5509; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5510; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5511; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5512; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5513; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5514; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5515; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5516; GFX90A-TGSPLIT-NEXT: buffer_invl2 5517; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5518; GFX90A-TGSPLIT-NEXT: s_endpgm 5519; 5520; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 5521; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5522; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5523; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5524; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5525; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5526; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5527; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5528; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5529; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5530; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5531; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5532; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5533; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5534; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5535; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5536; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5537; 5538; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: 5539; GFX940-TGSPLIT: ; %bb.0: ; %entry 5540; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5541; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5542; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5543; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5544; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5546; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5547; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5548; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5549; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5550; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5551; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5552; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5553; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5554; GFX940-TGSPLIT-NEXT: s_endpgm 5555; 5556; GFX11-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: 5557; GFX11-WGP: ; %bb.0: ; %entry 5558; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5559; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5560; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5561; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5562; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5563; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5564; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5565; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5566; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5567; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5568; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5569; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5570; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5571; GFX11-WGP-NEXT: buffer_gl1_inv 5572; GFX11-WGP-NEXT: buffer_gl0_inv 5573; GFX11-WGP-NEXT: s_endpgm 5574; 5575; GFX11-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: 5576; GFX11-CU: ; %bb.0: ; %entry 5577; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5578; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5579; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5580; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5581; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5582; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5583; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5584; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5585; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5586; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5587; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5588; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5589; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5590; GFX11-CU-NEXT: buffer_gl1_inv 5591; GFX11-CU-NEXT: buffer_gl0_inv 5592; GFX11-CU-NEXT: s_endpgm 5593; 5594; GFX12-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: 5595; GFX12-WGP: ; %bb.0: ; %entry 5596; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5597; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5598; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5599; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5600; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5601; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5602; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5603; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5604; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5605; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5606; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5607; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5608; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5609; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5610; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5611; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5612; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5613; GFX12-WGP-NEXT: s_endpgm 5614; 5615; GFX12-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: 5616; GFX12-CU: ; %bb.0: ; %entry 5617; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5618; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5619; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5620; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5621; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5622; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5623; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5624; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5625; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5626; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5627; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5628; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5629; GFX12-CU-NEXT: s_wait_storecnt 0x0 5630; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5631; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5632; GFX12-CU-NEXT: s_wait_storecnt 0x0 5633; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5634; GFX12-CU-NEXT: s_endpgm 5635 ptr addrspace(1) %out, i32 %in, i32 %old) { 5636entry: 5637 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5638 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire 5639 ret void 5640} 5641 5642define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( 5643; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5644; GFX6: ; %bb.0: ; %entry 5645; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5646; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5647; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5648; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5649; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5650; GFX6-NEXT: s_mov_b32 s12, s5 5651; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5652; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5653; GFX6-NEXT: s_mov_b32 s11, -1 5654; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5655; GFX6-NEXT: s_mov_b32 s5, s12 5656; GFX6-NEXT: s_mov_b32 s6, s11 5657; GFX6-NEXT: s_mov_b32 s7, s10 5658; GFX6-NEXT: v_mov_b32_e32 v0, s9 5659; GFX6-NEXT: v_mov_b32_e32 v2, s8 5660; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5661; GFX6-NEXT: v_mov_b32_e32 v1, v2 5662; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5663; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5664; GFX6-NEXT: s_waitcnt vmcnt(0) 5665; GFX6-NEXT: buffer_wbinvl1 5666; GFX6-NEXT: s_endpgm 5667; 5668; GFX7-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5669; GFX7: ; %bb.0: ; %entry 5670; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5671; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5672; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5673; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5674; GFX7-NEXT: s_mov_b64 s[10:11], 16 5675; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5676; GFX7-NEXT: s_mov_b32 s4, s8 5677; GFX7-NEXT: s_mov_b32 s5, s9 5678; GFX7-NEXT: s_mov_b32 s9, s10 5679; GFX7-NEXT: s_mov_b32 s8, s11 5680; GFX7-NEXT: s_add_u32 s4, s4, s9 5681; GFX7-NEXT: s_addc_u32 s8, s5, s8 5682; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5683; GFX7-NEXT: s_mov_b32 s5, s8 5684; GFX7-NEXT: v_mov_b32_e32 v2, s7 5685; GFX7-NEXT: v_mov_b32_e32 v0, s6 5686; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5687; GFX7-NEXT: v_mov_b32_e32 v3, v0 5688; GFX7-NEXT: v_mov_b32_e32 v0, s4 5689; GFX7-NEXT: v_mov_b32_e32 v1, s5 5690; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5691; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5692; GFX7-NEXT: s_waitcnt vmcnt(0) 5693; GFX7-NEXT: buffer_wbinvl1_vol 5694; GFX7-NEXT: s_endpgm 5695; 5696; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5697; GFX10-WGP: ; %bb.0: ; %entry 5698; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5699; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5700; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5701; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5702; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5703; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5704; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5705; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5706; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5707; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5708; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5709; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5710; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5711; GFX10-WGP-NEXT: buffer_gl1_inv 5712; GFX10-WGP-NEXT: buffer_gl0_inv 5713; GFX10-WGP-NEXT: s_endpgm 5714; 5715; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5716; GFX10-CU: ; %bb.0: ; %entry 5717; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5718; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5719; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5720; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5721; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5722; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5723; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5724; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5725; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5726; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5727; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5728; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5729; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5730; GFX10-CU-NEXT: buffer_gl1_inv 5731; GFX10-CU-NEXT: buffer_gl0_inv 5732; GFX10-CU-NEXT: s_endpgm 5733; 5734; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5735; SKIP-CACHE-INV: ; %bb.0: ; %entry 5736; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5737; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5738; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5739; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5740; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5741; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5742; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5743; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5744; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5745; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5746; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5747; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5748; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5749; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5750; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5751; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5752; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5753; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5754; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5755; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5756; SKIP-CACHE-INV-NEXT: s_endpgm 5757; 5758; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5759; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5760; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5761; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5762; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5763; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5764; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5765; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5766; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5767; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5768; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5769; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5770; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5771; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5772; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5773; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5774; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5775; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5776; 5777; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5778; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5779; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5780; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5781; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5782; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5783; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5784; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5785; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5786; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5787; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5788; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5789; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5790; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5791; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5792; GFX90A-TGSPLIT-NEXT: buffer_invl2 5793; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5794; GFX90A-TGSPLIT-NEXT: s_endpgm 5795; 5796; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5797; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5798; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5799; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5800; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5801; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5802; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5803; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5804; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5805; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5806; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5807; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5808; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5809; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5810; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 5811; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5812; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5813; 5814; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5815; GFX940-TGSPLIT: ; %bb.0: ; %entry 5816; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5817; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5818; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5819; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5820; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5821; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5822; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5823; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5824; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5825; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5826; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5827; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 5828; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5829; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5830; GFX940-TGSPLIT-NEXT: s_endpgm 5831; 5832; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5833; GFX11-WGP: ; %bb.0: ; %entry 5834; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5835; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5836; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5837; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5838; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5839; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5840; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5841; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5842; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5843; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5844; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5845; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5846; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5847; GFX11-WGP-NEXT: buffer_gl1_inv 5848; GFX11-WGP-NEXT: buffer_gl0_inv 5849; GFX11-WGP-NEXT: s_endpgm 5850; 5851; GFX11-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5852; GFX11-CU: ; %bb.0: ; %entry 5853; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5854; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5855; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5856; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5857; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5858; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5859; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5860; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5861; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5862; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5863; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5864; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5865; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5866; GFX11-CU-NEXT: buffer_gl1_inv 5867; GFX11-CU-NEXT: buffer_gl0_inv 5868; GFX11-CU-NEXT: s_endpgm 5869; 5870; GFX12-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5871; GFX12-WGP: ; %bb.0: ; %entry 5872; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5873; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5874; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5875; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5876; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5877; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5878; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5879; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5880; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5881; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5882; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5883; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5884; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5885; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5886; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5887; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5888; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5889; GFX12-WGP-NEXT: s_endpgm 5890; 5891; GFX12-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 5892; GFX12-CU: ; %bb.0: ; %entry 5893; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5894; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5895; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5896; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5897; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5898; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5899; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5900; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5901; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5902; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5903; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5904; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5905; GFX12-CU-NEXT: s_wait_storecnt 0x0 5906; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5907; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 5908; GFX12-CU-NEXT: s_wait_storecnt 0x0 5909; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5910; GFX12-CU-NEXT: s_endpgm 5911 ptr addrspace(1) %out, i32 %in, i32 %old) { 5912entry: 5913 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5914 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst 5915 ret void 5916} 5917 5918define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg( 5919; GFX6-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 5920; GFX6: ; %bb.0: ; %entry 5921; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5922; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5923; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5924; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5925; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5926; GFX6-NEXT: s_mov_b32 s12, s5 5927; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5928; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5929; GFX6-NEXT: s_mov_b32 s11, -1 5930; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5931; GFX6-NEXT: s_mov_b32 s5, s12 5932; GFX6-NEXT: s_mov_b32 s6, s11 5933; GFX6-NEXT: s_mov_b32 s7, s10 5934; GFX6-NEXT: v_mov_b32_e32 v0, s9 5935; GFX6-NEXT: v_mov_b32_e32 v2, s8 5936; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5937; GFX6-NEXT: v_mov_b32_e32 v1, v2 5938; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5939; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 5940; GFX6-NEXT: s_waitcnt vmcnt(0) 5941; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5942; GFX6-NEXT: s_endpgm 5943; 5944; GFX7-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 5945; GFX7: ; %bb.0: ; %entry 5946; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 5947; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5948; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 5949; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 5950; GFX7-NEXT: s_mov_b64 s[12:13], 16 5951; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5952; GFX7-NEXT: s_mov_b32 s6, s4 5953; GFX7-NEXT: s_mov_b32 s7, s5 5954; GFX7-NEXT: s_mov_b32 s11, s12 5955; GFX7-NEXT: s_mov_b32 s10, s13 5956; GFX7-NEXT: s_add_u32 s6, s6, s11 5957; GFX7-NEXT: s_addc_u32 s10, s7, s10 5958; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 5959; GFX7-NEXT: s_mov_b32 s7, s10 5960; GFX7-NEXT: v_mov_b32_e32 v2, s9 5961; GFX7-NEXT: v_mov_b32_e32 v0, s8 5962; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5963; GFX7-NEXT: v_mov_b32_e32 v3, v0 5964; GFX7-NEXT: v_mov_b32_e32 v0, s6 5965; GFX7-NEXT: v_mov_b32_e32 v1, s7 5966; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5967; GFX7-NEXT: v_mov_b32_e32 v0, s4 5968; GFX7-NEXT: v_mov_b32_e32 v1, s5 5969; GFX7-NEXT: s_waitcnt vmcnt(0) 5970; GFX7-NEXT: flat_store_dword v[0:1], v2 5971; GFX7-NEXT: s_endpgm 5972; 5973; GFX10-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 5974; GFX10-WGP: ; %bb.0: ; %entry 5975; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5976; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5977; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5978; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5979; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5980; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5981; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5982; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5983; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5984; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 5985; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5986; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 5987; GFX10-WGP-NEXT: s_endpgm 5988; 5989; GFX10-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 5990; GFX10-CU: ; %bb.0: ; %entry 5991; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5992; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5993; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5994; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5995; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5996; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5997; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5998; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5999; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6000; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6001; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6002; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6003; GFX10-CU-NEXT: s_endpgm 6004; 6005; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6006; SKIP-CACHE-INV: ; %bb.0: ; %entry 6007; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6008; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6009; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6010; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6011; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6012; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6013; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6014; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6015; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6016; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6017; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6018; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6019; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6020; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6021; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6022; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6023; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6024; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6025; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6026; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6027; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6028; SKIP-CACHE-INV-NEXT: s_endpgm 6029; 6030; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6031; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6032; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6033; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6034; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6035; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6036; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6037; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6038; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6039; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6040; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6041; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6042; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6043; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6044; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6045; 6046; GFX90A-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6047; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6048; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6049; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6050; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6051; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6052; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6053; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6054; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6055; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6056; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6057; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6058; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6059; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6060; GFX90A-TGSPLIT-NEXT: s_endpgm 6061; 6062; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6063; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6064; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6065; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6066; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6067; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6068; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6069; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6070; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6071; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6072; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6073; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6074; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6075; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6076; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6077; 6078; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6079; GFX940-TGSPLIT: ; %bb.0: ; %entry 6080; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6081; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6082; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6083; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6084; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6085; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6086; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6087; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6088; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6089; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6090; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6091; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6092; GFX940-TGSPLIT-NEXT: s_endpgm 6093; 6094; GFX11-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6095; GFX11-WGP: ; %bb.0: ; %entry 6096; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6097; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6098; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6099; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6100; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6101; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6102; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6103; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6104; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6105; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6106; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6107; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6108; GFX11-WGP-NEXT: s_endpgm 6109; 6110; GFX11-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6111; GFX11-CU: ; %bb.0: ; %entry 6112; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6113; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6114; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6115; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6116; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6117; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6118; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6119; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6120; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6121; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6122; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6123; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6124; GFX11-CU-NEXT: s_endpgm 6125; 6126; GFX12-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6127; GFX12-WGP: ; %bb.0: ; %entry 6128; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6129; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6130; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6131; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6132; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6133; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6134; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6135; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6136; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6137; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6138; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6139; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6140; GFX12-WGP-NEXT: s_endpgm 6141; 6142; GFX12-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: 6143; GFX12-CU: ; %bb.0: ; %entry 6144; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6145; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6146; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6147; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6148; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6149; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6150; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6151; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6152; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6153; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6154; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6155; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6156; GFX12-CU-NEXT: s_endpgm 6157 ptr addrspace(1) %out, i32 %in, i32 %old) { 6158entry: 6159 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6160 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic 6161 %val0 = extractvalue { i32, i1 } %val, 0 6162 store i32 %val0, ptr addrspace(1) %out, align 4 6163 ret void 6164} 6165 6166define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( 6167; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6168; GFX6: ; %bb.0: ; %entry 6169; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6170; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6171; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6172; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6173; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6174; GFX6-NEXT: s_mov_b32 s12, s5 6175; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6176; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6177; GFX6-NEXT: s_mov_b32 s11, -1 6178; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6179; GFX6-NEXT: s_mov_b32 s5, s12 6180; GFX6-NEXT: s_mov_b32 s6, s11 6181; GFX6-NEXT: s_mov_b32 s7, s10 6182; GFX6-NEXT: v_mov_b32_e32 v0, s9 6183; GFX6-NEXT: v_mov_b32_e32 v2, s8 6184; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6185; GFX6-NEXT: v_mov_b32_e32 v1, v2 6186; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6187; GFX6-NEXT: s_waitcnt vmcnt(0) 6188; GFX6-NEXT: buffer_wbinvl1 6189; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6190; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6191; GFX6-NEXT: s_endpgm 6192; 6193; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6194; GFX7: ; %bb.0: ; %entry 6195; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6196; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6197; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6198; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6199; GFX7-NEXT: s_mov_b64 s[12:13], 16 6200; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6201; GFX7-NEXT: s_mov_b32 s6, s4 6202; GFX7-NEXT: s_mov_b32 s7, s5 6203; GFX7-NEXT: s_mov_b32 s11, s12 6204; GFX7-NEXT: s_mov_b32 s10, s13 6205; GFX7-NEXT: s_add_u32 s6, s6, s11 6206; GFX7-NEXT: s_addc_u32 s10, s7, s10 6207; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6208; GFX7-NEXT: s_mov_b32 s7, s10 6209; GFX7-NEXT: v_mov_b32_e32 v2, s9 6210; GFX7-NEXT: v_mov_b32_e32 v0, s8 6211; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6212; GFX7-NEXT: v_mov_b32_e32 v3, v0 6213; GFX7-NEXT: v_mov_b32_e32 v0, s6 6214; GFX7-NEXT: v_mov_b32_e32 v1, s7 6215; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6216; GFX7-NEXT: s_waitcnt vmcnt(0) 6217; GFX7-NEXT: buffer_wbinvl1_vol 6218; GFX7-NEXT: v_mov_b32_e32 v0, s4 6219; GFX7-NEXT: v_mov_b32_e32 v1, s5 6220; GFX7-NEXT: flat_store_dword v[0:1], v2 6221; GFX7-NEXT: s_endpgm 6222; 6223; GFX10-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6224; GFX10-WGP: ; %bb.0: ; %entry 6225; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6226; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6227; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6228; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6229; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6230; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6231; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6232; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6233; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6234; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6235; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6236; GFX10-WGP-NEXT: buffer_gl1_inv 6237; GFX10-WGP-NEXT: buffer_gl0_inv 6238; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6239; GFX10-WGP-NEXT: s_endpgm 6240; 6241; GFX10-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6242; GFX10-CU: ; %bb.0: ; %entry 6243; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6244; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6245; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6246; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6247; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6248; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6249; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6250; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6251; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6252; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6253; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6254; GFX10-CU-NEXT: buffer_gl1_inv 6255; GFX10-CU-NEXT: buffer_gl0_inv 6256; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6257; GFX10-CU-NEXT: s_endpgm 6258; 6259; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6260; SKIP-CACHE-INV: ; %bb.0: ; %entry 6261; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6262; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6263; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6264; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6265; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6266; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6267; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6268; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6269; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6270; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6271; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6272; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6273; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6274; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6275; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6276; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6277; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6278; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6279; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6280; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6281; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6282; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6283; SKIP-CACHE-INV-NEXT: s_endpgm 6284; 6285; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6286; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6287; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6288; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6289; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6290; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6291; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6292; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6293; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6294; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6295; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6296; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6297; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6298; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6299; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6300; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6301; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6302; 6303; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6304; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6305; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6306; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6307; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6308; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6309; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6310; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6311; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6312; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6313; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6314; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6315; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6316; GFX90A-TGSPLIT-NEXT: buffer_invl2 6317; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6318; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6319; GFX90A-TGSPLIT-NEXT: s_endpgm 6320; 6321; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6322; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6323; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6324; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6325; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6326; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6327; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6328; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6329; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6330; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6331; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6332; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6333; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6334; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6335; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6336; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6337; 6338; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6339; GFX940-TGSPLIT: ; %bb.0: ; %entry 6340; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6341; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6342; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6343; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6344; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6345; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6346; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6347; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6348; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6349; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6350; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6351; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6352; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6353; GFX940-TGSPLIT-NEXT: s_endpgm 6354; 6355; GFX11-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6356; GFX11-WGP: ; %bb.0: ; %entry 6357; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6358; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6359; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6360; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6361; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6362; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6363; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6364; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6365; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6366; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6367; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6368; GFX11-WGP-NEXT: buffer_gl1_inv 6369; GFX11-WGP-NEXT: buffer_gl0_inv 6370; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6371; GFX11-WGP-NEXT: s_endpgm 6372; 6373; GFX11-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6374; GFX11-CU: ; %bb.0: ; %entry 6375; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6376; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6377; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6378; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6379; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6380; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6381; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6382; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6383; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6384; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6385; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6386; GFX11-CU-NEXT: buffer_gl1_inv 6387; GFX11-CU-NEXT: buffer_gl0_inv 6388; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6389; GFX11-CU-NEXT: s_endpgm 6390; 6391; GFX12-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6392; GFX12-WGP: ; %bb.0: ; %entry 6393; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6394; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6395; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6396; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6397; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6398; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6399; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6400; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6401; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6402; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6403; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6404; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6405; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6406; GFX12-WGP-NEXT: s_endpgm 6407; 6408; GFX12-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 6409; GFX12-CU: ; %bb.0: ; %entry 6410; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6411; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6412; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6413; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6414; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6415; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6416; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6417; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6418; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6419; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6420; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6421; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6422; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6423; GFX12-CU-NEXT: s_endpgm 6424 ptr addrspace(1) %out, i32 %in, i32 %old) { 6425entry: 6426 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6427 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic 6428 %val0 = extractvalue { i32, i1 } %val, 0 6429 store i32 %val0, ptr addrspace(1) %out, align 4 6430 ret void 6431} 6432 6433define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( 6434; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6435; GFX6: ; %bb.0: ; %entry 6436; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6437; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6438; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6439; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6440; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6441; GFX6-NEXT: s_mov_b32 s12, s5 6442; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6443; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6444; GFX6-NEXT: s_mov_b32 s11, -1 6445; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6446; GFX6-NEXT: s_mov_b32 s5, s12 6447; GFX6-NEXT: s_mov_b32 s6, s11 6448; GFX6-NEXT: s_mov_b32 s7, s10 6449; GFX6-NEXT: v_mov_b32_e32 v0, s9 6450; GFX6-NEXT: v_mov_b32_e32 v2, s8 6451; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6452; GFX6-NEXT: v_mov_b32_e32 v1, v2 6453; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6454; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6455; GFX6-NEXT: s_waitcnt vmcnt(0) 6456; GFX6-NEXT: buffer_wbinvl1 6457; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6458; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6459; GFX6-NEXT: s_endpgm 6460; 6461; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6462; GFX7: ; %bb.0: ; %entry 6463; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6464; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6465; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6466; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6467; GFX7-NEXT: s_mov_b64 s[12:13], 16 6468; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6469; GFX7-NEXT: s_mov_b32 s6, s4 6470; GFX7-NEXT: s_mov_b32 s7, s5 6471; GFX7-NEXT: s_mov_b32 s11, s12 6472; GFX7-NEXT: s_mov_b32 s10, s13 6473; GFX7-NEXT: s_add_u32 s6, s6, s11 6474; GFX7-NEXT: s_addc_u32 s10, s7, s10 6475; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6476; GFX7-NEXT: s_mov_b32 s7, s10 6477; GFX7-NEXT: v_mov_b32_e32 v2, s9 6478; GFX7-NEXT: v_mov_b32_e32 v0, s8 6479; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6480; GFX7-NEXT: v_mov_b32_e32 v3, v0 6481; GFX7-NEXT: v_mov_b32_e32 v0, s6 6482; GFX7-NEXT: v_mov_b32_e32 v1, s7 6483; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6484; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6485; GFX7-NEXT: s_waitcnt vmcnt(0) 6486; GFX7-NEXT: buffer_wbinvl1_vol 6487; GFX7-NEXT: v_mov_b32_e32 v0, s4 6488; GFX7-NEXT: v_mov_b32_e32 v1, s5 6489; GFX7-NEXT: flat_store_dword v[0:1], v2 6490; GFX7-NEXT: s_endpgm 6491; 6492; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6493; GFX10-WGP: ; %bb.0: ; %entry 6494; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6495; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6496; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6497; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6498; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6499; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6500; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6501; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6502; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6503; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6504; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6505; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6506; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6507; GFX10-WGP-NEXT: buffer_gl1_inv 6508; GFX10-WGP-NEXT: buffer_gl0_inv 6509; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6510; GFX10-WGP-NEXT: s_endpgm 6511; 6512; GFX10-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6513; GFX10-CU: ; %bb.0: ; %entry 6514; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6515; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6516; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6517; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6518; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6519; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6520; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6521; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6522; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6523; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6524; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6525; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6526; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6527; GFX10-CU-NEXT: buffer_gl1_inv 6528; GFX10-CU-NEXT: buffer_gl0_inv 6529; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6530; GFX10-CU-NEXT: s_endpgm 6531; 6532; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6533; SKIP-CACHE-INV: ; %bb.0: ; %entry 6534; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6535; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6536; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6537; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6538; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6539; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6540; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6541; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6542; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6543; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6544; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6545; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6546; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6547; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6548; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6549; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6550; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6551; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6552; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6553; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6554; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6555; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6556; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6557; SKIP-CACHE-INV-NEXT: s_endpgm 6558; 6559; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6560; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6561; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6562; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6563; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6564; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6565; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6566; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6567; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6568; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6569; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6570; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6571; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6572; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6573; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6574; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6575; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6576; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6577; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6578; 6579; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6580; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6581; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6582; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6583; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6584; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6585; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6586; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6587; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6588; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6589; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6590; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6591; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6592; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6593; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6594; GFX90A-TGSPLIT-NEXT: buffer_invl2 6595; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6596; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6597; GFX90A-TGSPLIT-NEXT: s_endpgm 6598; 6599; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6600; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6601; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6602; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6603; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6604; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6605; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6606; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6607; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6608; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6609; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6610; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6611; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6612; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6613; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6614; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6615; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6616; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6617; 6618; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6619; GFX940-TGSPLIT: ; %bb.0: ; %entry 6620; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6621; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6622; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6623; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6624; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6625; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6626; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6627; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6628; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6629; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6630; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6631; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6632; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6633; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6634; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6635; GFX940-TGSPLIT-NEXT: s_endpgm 6636; 6637; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6638; GFX11-WGP: ; %bb.0: ; %entry 6639; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6640; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6641; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6642; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6643; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6644; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6645; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6646; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6647; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6648; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6649; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6650; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6651; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6652; GFX11-WGP-NEXT: buffer_gl1_inv 6653; GFX11-WGP-NEXT: buffer_gl0_inv 6654; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6655; GFX11-WGP-NEXT: s_endpgm 6656; 6657; GFX11-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6658; GFX11-CU: ; %bb.0: ; %entry 6659; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6660; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6661; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6662; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6663; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6664; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6665; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6666; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6667; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6668; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6669; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6670; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6671; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6672; GFX11-CU-NEXT: buffer_gl1_inv 6673; GFX11-CU-NEXT: buffer_gl0_inv 6674; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6675; GFX11-CU-NEXT: s_endpgm 6676; 6677; GFX12-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6678; GFX12-WGP: ; %bb.0: ; %entry 6679; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6680; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6681; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6682; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6683; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6684; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6685; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6686; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6687; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6688; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6689; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6690; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6691; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6692; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6693; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6694; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6695; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6696; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6697; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6698; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6699; GFX12-WGP-NEXT: s_endpgm 6700; 6701; GFX12-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 6702; GFX12-CU: ; %bb.0: ; %entry 6703; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6704; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6705; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6706; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6707; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6708; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6709; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6710; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6711; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6712; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 6713; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6714; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6715; GFX12-CU-NEXT: s_wait_storecnt 0x0 6716; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6717; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6718; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6719; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6720; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6721; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6722; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6723; GFX12-CU-NEXT: s_endpgm 6724 ptr addrspace(1) %out, i32 %in, i32 %old) { 6725entry: 6726 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6727 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic 6728 %val0 = extractvalue { i32, i1 } %val, 0 6729 store i32 %val0, ptr addrspace(1) %out, align 4 6730 ret void 6731} 6732 6733define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( 6734; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6735; GFX6: ; %bb.0: ; %entry 6736; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6737; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6738; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6739; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6740; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6741; GFX6-NEXT: s_mov_b32 s12, s5 6742; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6743; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6744; GFX6-NEXT: s_mov_b32 s11, -1 6745; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6746; GFX6-NEXT: s_mov_b32 s5, s12 6747; GFX6-NEXT: s_mov_b32 s6, s11 6748; GFX6-NEXT: s_mov_b32 s7, s10 6749; GFX6-NEXT: v_mov_b32_e32 v0, s9 6750; GFX6-NEXT: v_mov_b32_e32 v2, s8 6751; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6752; GFX6-NEXT: v_mov_b32_e32 v1, v2 6753; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6754; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6755; GFX6-NEXT: s_waitcnt vmcnt(0) 6756; GFX6-NEXT: buffer_wbinvl1 6757; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6758; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6759; GFX6-NEXT: s_endpgm 6760; 6761; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6762; GFX7: ; %bb.0: ; %entry 6763; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6764; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6765; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6766; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6767; GFX7-NEXT: s_mov_b64 s[12:13], 16 6768; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6769; GFX7-NEXT: s_mov_b32 s6, s4 6770; GFX7-NEXT: s_mov_b32 s7, s5 6771; GFX7-NEXT: s_mov_b32 s11, s12 6772; GFX7-NEXT: s_mov_b32 s10, s13 6773; GFX7-NEXT: s_add_u32 s6, s6, s11 6774; GFX7-NEXT: s_addc_u32 s10, s7, s10 6775; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6776; GFX7-NEXT: s_mov_b32 s7, s10 6777; GFX7-NEXT: v_mov_b32_e32 v2, s9 6778; GFX7-NEXT: v_mov_b32_e32 v0, s8 6779; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6780; GFX7-NEXT: v_mov_b32_e32 v3, v0 6781; GFX7-NEXT: v_mov_b32_e32 v0, s6 6782; GFX7-NEXT: v_mov_b32_e32 v1, s7 6783; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6784; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6785; GFX7-NEXT: s_waitcnt vmcnt(0) 6786; GFX7-NEXT: buffer_wbinvl1_vol 6787; GFX7-NEXT: v_mov_b32_e32 v0, s4 6788; GFX7-NEXT: v_mov_b32_e32 v1, s5 6789; GFX7-NEXT: flat_store_dword v[0:1], v2 6790; GFX7-NEXT: s_endpgm 6791; 6792; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6793; GFX10-WGP: ; %bb.0: ; %entry 6794; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6795; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6796; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6797; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6798; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6799; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6800; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6801; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6802; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6803; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6804; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6805; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6806; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6807; GFX10-WGP-NEXT: buffer_gl1_inv 6808; GFX10-WGP-NEXT: buffer_gl0_inv 6809; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6810; GFX10-WGP-NEXT: s_endpgm 6811; 6812; GFX10-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6813; GFX10-CU: ; %bb.0: ; %entry 6814; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6815; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6816; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6817; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6818; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6819; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6820; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6821; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6822; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6823; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6824; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6825; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6826; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6827; GFX10-CU-NEXT: buffer_gl1_inv 6828; GFX10-CU-NEXT: buffer_gl0_inv 6829; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6830; GFX10-CU-NEXT: s_endpgm 6831; 6832; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6833; SKIP-CACHE-INV: ; %bb.0: ; %entry 6834; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6835; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6836; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6837; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6838; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6839; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6840; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6841; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6842; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6843; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6844; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6845; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6846; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6847; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6848; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6849; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6850; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6851; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6852; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6853; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6854; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6855; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6856; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6857; SKIP-CACHE-INV-NEXT: s_endpgm 6858; 6859; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6860; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6861; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6862; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6863; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6864; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6865; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6866; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6867; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6868; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6869; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6870; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6871; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6872; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6873; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6874; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6875; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6876; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6877; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6878; 6879; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6880; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6881; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6882; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6883; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6884; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6885; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6886; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6887; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6888; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6889; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6890; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6891; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6892; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6893; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6894; GFX90A-TGSPLIT-NEXT: buffer_invl2 6895; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6896; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6897; GFX90A-TGSPLIT-NEXT: s_endpgm 6898; 6899; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6900; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6901; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6902; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6903; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6904; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6905; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6906; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6907; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6908; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6909; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6910; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6911; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6912; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6913; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6914; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6915; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6916; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6917; 6918; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6919; GFX940-TGSPLIT: ; %bb.0: ; %entry 6920; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6921; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6922; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6923; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6924; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6925; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6926; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6927; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6928; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6929; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6930; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6931; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 6932; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6933; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6934; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6935; GFX940-TGSPLIT-NEXT: s_endpgm 6936; 6937; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6938; GFX11-WGP: ; %bb.0: ; %entry 6939; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6940; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6941; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6942; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6943; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6944; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6945; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6946; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6947; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6948; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6949; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6950; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6951; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6952; GFX11-WGP-NEXT: buffer_gl1_inv 6953; GFX11-WGP-NEXT: buffer_gl0_inv 6954; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6955; GFX11-WGP-NEXT: s_endpgm 6956; 6957; GFX11-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6958; GFX11-CU: ; %bb.0: ; %entry 6959; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6960; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6961; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6962; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6963; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6964; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6965; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6966; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6967; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6968; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6969; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6970; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6971; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6972; GFX11-CU-NEXT: buffer_gl1_inv 6973; GFX11-CU-NEXT: buffer_gl0_inv 6974; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6975; GFX11-CU-NEXT: s_endpgm 6976; 6977; GFX12-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 6978; GFX12-WGP: ; %bb.0: ; %entry 6979; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6980; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6981; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6982; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6983; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6984; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6985; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6986; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6987; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6988; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6989; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6990; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6991; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6992; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6993; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 6994; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6995; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6996; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6997; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6998; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6999; GFX12-WGP-NEXT: s_endpgm 7000; 7001; GFX12-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 7002; GFX12-CU: ; %bb.0: ; %entry 7003; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7004; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7005; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7006; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7007; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7008; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7009; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7010; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7011; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7012; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 7013; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7014; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7015; GFX12-CU-NEXT: s_wait_storecnt 0x0 7016; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7017; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7018; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7019; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7020; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7021; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 7022; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7023; GFX12-CU-NEXT: s_endpgm 7024 ptr addrspace(1) %out, i32 %in, i32 %old) { 7025entry: 7026 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7027 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic 7028 %val0 = extractvalue { i32, i1 } %val, 0 7029 store i32 %val0, ptr addrspace(1) %out, align 4 7030 ret void 7031} 7032 7033define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( 7034; GFX6-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7035; GFX6: ; %bb.0: ; %entry 7036; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7037; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7038; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7039; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7040; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7041; GFX6-NEXT: s_mov_b32 s12, s5 7042; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7043; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7044; GFX6-NEXT: s_mov_b32 s11, -1 7045; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7046; GFX6-NEXT: s_mov_b32 s5, s12 7047; GFX6-NEXT: s_mov_b32 s6, s11 7048; GFX6-NEXT: s_mov_b32 s7, s10 7049; GFX6-NEXT: v_mov_b32_e32 v0, s9 7050; GFX6-NEXT: v_mov_b32_e32 v2, s8 7051; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7052; GFX6-NEXT: v_mov_b32_e32 v1, v2 7053; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7054; GFX6-NEXT: s_waitcnt vmcnt(0) 7055; GFX6-NEXT: buffer_wbinvl1 7056; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7057; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7058; GFX6-NEXT: s_endpgm 7059; 7060; GFX7-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7061; GFX7: ; %bb.0: ; %entry 7062; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7063; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7064; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7065; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7066; GFX7-NEXT: s_mov_b64 s[12:13], 16 7067; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7068; GFX7-NEXT: s_mov_b32 s6, s4 7069; GFX7-NEXT: s_mov_b32 s7, s5 7070; GFX7-NEXT: s_mov_b32 s11, s12 7071; GFX7-NEXT: s_mov_b32 s10, s13 7072; GFX7-NEXT: s_add_u32 s6, s6, s11 7073; GFX7-NEXT: s_addc_u32 s10, s7, s10 7074; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7075; GFX7-NEXT: s_mov_b32 s7, s10 7076; GFX7-NEXT: v_mov_b32_e32 v2, s9 7077; GFX7-NEXT: v_mov_b32_e32 v0, s8 7078; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7079; GFX7-NEXT: v_mov_b32_e32 v3, v0 7080; GFX7-NEXT: v_mov_b32_e32 v0, s6 7081; GFX7-NEXT: v_mov_b32_e32 v1, s7 7082; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7083; GFX7-NEXT: s_waitcnt vmcnt(0) 7084; GFX7-NEXT: buffer_wbinvl1_vol 7085; GFX7-NEXT: v_mov_b32_e32 v0, s4 7086; GFX7-NEXT: v_mov_b32_e32 v1, s5 7087; GFX7-NEXT: flat_store_dword v[0:1], v2 7088; GFX7-NEXT: s_endpgm 7089; 7090; GFX10-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7091; GFX10-WGP: ; %bb.0: ; %entry 7092; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7093; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7094; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7095; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7096; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7097; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7098; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7099; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7100; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7101; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7102; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7103; GFX10-WGP-NEXT: buffer_gl1_inv 7104; GFX10-WGP-NEXT: buffer_gl0_inv 7105; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7106; GFX10-WGP-NEXT: s_endpgm 7107; 7108; GFX10-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7109; GFX10-CU: ; %bb.0: ; %entry 7110; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7111; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7112; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7113; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7114; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7115; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7116; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7117; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7118; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7119; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7120; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7121; GFX10-CU-NEXT: buffer_gl1_inv 7122; GFX10-CU-NEXT: buffer_gl0_inv 7123; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7124; GFX10-CU-NEXT: s_endpgm 7125; 7126; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7127; SKIP-CACHE-INV: ; %bb.0: ; %entry 7128; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7129; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7130; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7131; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7132; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7133; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7134; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7135; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7136; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7137; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7138; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7139; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7140; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7141; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7142; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7143; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7144; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7145; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7146; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7147; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7148; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7149; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7150; SKIP-CACHE-INV-NEXT: s_endpgm 7151; 7152; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7153; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7154; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7155; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7156; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7157; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7158; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7159; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7160; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7161; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7162; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7163; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7164; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7165; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7166; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7167; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7168; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7169; 7170; GFX90A-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7171; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7172; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7173; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7174; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7175; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7176; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7177; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7178; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7179; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7180; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7181; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7182; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7183; GFX90A-TGSPLIT-NEXT: buffer_invl2 7184; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7185; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7186; GFX90A-TGSPLIT-NEXT: s_endpgm 7187; 7188; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7189; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7190; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7191; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7192; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7193; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7194; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7195; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7196; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7197; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7198; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7199; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7200; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7201; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 7202; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7203; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7204; 7205; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7206; GFX940-TGSPLIT: ; %bb.0: ; %entry 7207; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7208; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7209; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7210; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7211; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7212; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7213; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7214; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7215; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7216; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7217; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7218; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 7219; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7220; GFX940-TGSPLIT-NEXT: s_endpgm 7221; 7222; GFX11-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7223; GFX11-WGP: ; %bb.0: ; %entry 7224; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7225; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7226; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7227; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7228; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7229; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7230; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7231; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7232; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7233; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7234; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7235; GFX11-WGP-NEXT: buffer_gl1_inv 7236; GFX11-WGP-NEXT: buffer_gl0_inv 7237; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7238; GFX11-WGP-NEXT: s_endpgm 7239; 7240; GFX11-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7241; GFX11-CU: ; %bb.0: ; %entry 7242; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7243; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7244; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7245; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7246; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7247; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7248; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7249; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7250; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7251; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7252; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7253; GFX11-CU-NEXT: buffer_gl1_inv 7254; GFX11-CU-NEXT: buffer_gl0_inv 7255; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7256; GFX11-CU-NEXT: s_endpgm 7257; 7258; GFX12-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7259; GFX12-WGP: ; %bb.0: ; %entry 7260; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7261; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7262; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7263; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7264; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7265; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7266; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7267; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7268; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7269; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7270; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7271; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7272; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7273; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 7274; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7275; GFX12-WGP-NEXT: s_endpgm 7276; 7277; GFX12-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg: 7278; GFX12-CU: ; %bb.0: ; %entry 7279; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7280; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7281; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7282; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7283; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7284; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7285; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7286; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7287; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7288; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7289; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7290; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7291; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7292; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 7293; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7294; GFX12-CU-NEXT: s_endpgm 7295 ptr addrspace(1) %out, i32 %in, i32 %old) { 7296entry: 7297 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7298 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire 7299 %val0 = extractvalue { i32, i1 } %val, 0 7300 store i32 %val0, ptr addrspace(1) %out, align 4 7301 ret void 7302} 7303 7304define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( 7305; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7306; GFX6: ; %bb.0: ; %entry 7307; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7308; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7309; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7310; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7311; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7312; GFX6-NEXT: s_mov_b32 s12, s5 7313; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7314; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7315; GFX6-NEXT: s_mov_b32 s11, -1 7316; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7317; GFX6-NEXT: s_mov_b32 s5, s12 7318; GFX6-NEXT: s_mov_b32 s6, s11 7319; GFX6-NEXT: s_mov_b32 s7, s10 7320; GFX6-NEXT: v_mov_b32_e32 v0, s9 7321; GFX6-NEXT: v_mov_b32_e32 v2, s8 7322; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7323; GFX6-NEXT: v_mov_b32_e32 v1, v2 7324; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7325; GFX6-NEXT: s_waitcnt vmcnt(0) 7326; GFX6-NEXT: buffer_wbinvl1 7327; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7328; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7329; GFX6-NEXT: s_endpgm 7330; 7331; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7332; GFX7: ; %bb.0: ; %entry 7333; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7334; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7335; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7336; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7337; GFX7-NEXT: s_mov_b64 s[12:13], 16 7338; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7339; GFX7-NEXT: s_mov_b32 s6, s4 7340; GFX7-NEXT: s_mov_b32 s7, s5 7341; GFX7-NEXT: s_mov_b32 s11, s12 7342; GFX7-NEXT: s_mov_b32 s10, s13 7343; GFX7-NEXT: s_add_u32 s6, s6, s11 7344; GFX7-NEXT: s_addc_u32 s10, s7, s10 7345; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7346; GFX7-NEXT: s_mov_b32 s7, s10 7347; GFX7-NEXT: v_mov_b32_e32 v2, s9 7348; GFX7-NEXT: v_mov_b32_e32 v0, s8 7349; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7350; GFX7-NEXT: v_mov_b32_e32 v3, v0 7351; GFX7-NEXT: v_mov_b32_e32 v0, s6 7352; GFX7-NEXT: v_mov_b32_e32 v1, s7 7353; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7354; GFX7-NEXT: s_waitcnt vmcnt(0) 7355; GFX7-NEXT: buffer_wbinvl1_vol 7356; GFX7-NEXT: v_mov_b32_e32 v0, s4 7357; GFX7-NEXT: v_mov_b32_e32 v1, s5 7358; GFX7-NEXT: flat_store_dword v[0:1], v2 7359; GFX7-NEXT: s_endpgm 7360; 7361; GFX10-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7362; GFX10-WGP: ; %bb.0: ; %entry 7363; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7364; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7365; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7366; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7367; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7368; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7369; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7370; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7371; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7372; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7373; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7374; GFX10-WGP-NEXT: buffer_gl1_inv 7375; GFX10-WGP-NEXT: buffer_gl0_inv 7376; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7377; GFX10-WGP-NEXT: s_endpgm 7378; 7379; GFX10-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7380; GFX10-CU: ; %bb.0: ; %entry 7381; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7382; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7383; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7384; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7385; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7386; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7387; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7388; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7389; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7390; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7391; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7392; GFX10-CU-NEXT: buffer_gl1_inv 7393; GFX10-CU-NEXT: buffer_gl0_inv 7394; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7395; GFX10-CU-NEXT: s_endpgm 7396; 7397; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7398; SKIP-CACHE-INV: ; %bb.0: ; %entry 7399; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7400; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7401; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7402; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7403; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7404; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7405; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7406; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7407; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7408; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7409; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7410; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7411; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7412; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7413; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7414; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7415; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7416; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7417; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7418; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7419; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7420; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7421; SKIP-CACHE-INV-NEXT: s_endpgm 7422; 7423; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7424; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7425; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7426; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7427; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7428; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7430; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7431; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7432; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7433; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7434; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7435; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7436; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7437; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7438; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7439; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7440; 7441; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7442; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7443; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7444; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7445; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7446; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7447; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7448; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7449; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7450; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7451; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7452; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7453; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7454; GFX90A-TGSPLIT-NEXT: buffer_invl2 7455; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7456; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7457; GFX90A-TGSPLIT-NEXT: s_endpgm 7458; 7459; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7460; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7461; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7462; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7463; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7464; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7465; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7466; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7467; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7468; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7469; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7470; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7471; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7472; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 7473; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7474; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7475; 7476; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7477; GFX940-TGSPLIT: ; %bb.0: ; %entry 7478; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7479; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7480; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7481; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7482; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7483; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7484; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7485; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7486; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7487; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7488; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7489; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 7490; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7491; GFX940-TGSPLIT-NEXT: s_endpgm 7492; 7493; GFX11-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7494; GFX11-WGP: ; %bb.0: ; %entry 7495; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7496; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7497; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7498; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7499; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7500; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7501; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7502; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7503; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7504; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7505; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7506; GFX11-WGP-NEXT: buffer_gl1_inv 7507; GFX11-WGP-NEXT: buffer_gl0_inv 7508; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7509; GFX11-WGP-NEXT: s_endpgm 7510; 7511; GFX11-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7512; GFX11-CU: ; %bb.0: ; %entry 7513; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7514; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7515; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7516; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7517; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7518; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7519; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7520; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7521; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7522; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7523; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7524; GFX11-CU-NEXT: buffer_gl1_inv 7525; GFX11-CU-NEXT: buffer_gl0_inv 7526; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7527; GFX11-CU-NEXT: s_endpgm 7528; 7529; GFX12-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7530; GFX12-WGP: ; %bb.0: ; %entry 7531; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7532; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7533; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7534; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7535; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7536; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7537; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7538; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7539; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7540; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7541; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7542; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 7543; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7544; GFX12-WGP-NEXT: s_endpgm 7545; 7546; GFX12-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg: 7547; GFX12-CU: ; %bb.0: ; %entry 7548; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7549; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7550; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7551; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7552; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7553; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7554; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7555; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7556; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7557; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7558; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7559; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 7560; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7561; GFX12-CU-NEXT: s_endpgm 7562 ptr addrspace(1) %out, i32 %in, i32 %old) { 7563entry: 7564 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7565 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire 7566 %val0 = extractvalue { i32, i1 } %val, 0 7567 store i32 %val0, ptr addrspace(1) %out, align 4 7568 ret void 7569} 7570 7571define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( 7572; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg: 7573; GFX6: ; %bb.0: ; %entry 7574; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7575; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7576; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7577; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7578; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7579; GFX6-NEXT: s_mov_b32 s12, s5 7580; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7581; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7582; GFX6-NEXT: s_mov_b32 s11, -1 7583; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7584; GFX6-NEXT: s_mov_b32 s5, s12 7585; GFX6-NEXT: s_mov_b32 s6, s11 7586; GFX6-NEXT: s_mov_b32 s7, s10 7587; GFX6-NEXT: v_mov_b32_e32 v0, s9 7588; GFX6-NEXT: v_mov_b32_e32 v2, s8 7589; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7590; GFX6-NEXT: v_mov_b32_e32 v1, v2 7591; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7592; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7593; GFX6-NEXT: s_waitcnt vmcnt(0) 7594; GFX6-NEXT: buffer_wbinvl1 7595; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7596; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7597; GFX6-NEXT: s_endpgm 7598; 7599; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg: 7600; GFX7: ; %bb.0: ; %entry 7601; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7602; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7603; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7604; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7605; GFX7-NEXT: s_mov_b64 s[12:13], 16 7606; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7607; GFX7-NEXT: s_mov_b32 s6, s4 7608; GFX7-NEXT: s_mov_b32 s7, s5 7609; GFX7-NEXT: s_mov_b32 s11, s12 7610; GFX7-NEXT: s_mov_b32 s10, s13 7611; GFX7-NEXT: s_add_u32 s6, s6, s11 7612; GFX7-NEXT: s_addc_u32 s10, s7, s10 7613; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7614; GFX7-NEXT: s_mov_b32 s7, s10 7615; GFX7-NEXT: v_mov_b32_e32 v2, s9 7616; GFX7-NEXT: v_mov_b32_e32 v0, s8 7617; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7618; GFX7-NEXT: v_mov_b32_e32 v3, v0 7619; GFX7-NEXT: v_mov_b32_e32 v0, s6 7620; GFX7-NEXT: v_mov_b32_e32 v1, s7 7621; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7622; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7623; GFX7-NEXT: s_waitcnt vmcnt(0) 7624; GFX7-NEXT: buffer_wbinvl1_vol 7625; GFX7-NEXT: v_mov_b32_e32 v0, s4 7626; GFX7-NEXT: v_mov_b32_e32 v1, s5 7627; GFX7-NEXT: flat_store_dword v[0:1], v2 7628; GFX7-NEXT: s_endpgm 7629; 7630; GFX10-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: 7631; GFX10-WGP: ; %bb.0: ; %entry 7632; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7633; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7634; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7635; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7636; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7637; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7638; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7639; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7640; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7641; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7642; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7643; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7644; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7645; GFX10-WGP-NEXT: buffer_gl1_inv 7646; GFX10-WGP-NEXT: buffer_gl0_inv 7647; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7648; GFX10-WGP-NEXT: s_endpgm 7649; 7650; GFX10-CU-LABEL: global_system_release_acquire_ret_cmpxchg: 7651; GFX10-CU: ; %bb.0: ; %entry 7652; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7653; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7654; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7655; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7656; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7657; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7658; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7659; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7660; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7661; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7662; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7663; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7664; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7665; GFX10-CU-NEXT: buffer_gl1_inv 7666; GFX10-CU-NEXT: buffer_gl0_inv 7667; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7668; GFX10-CU-NEXT: s_endpgm 7669; 7670; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg: 7671; SKIP-CACHE-INV: ; %bb.0: ; %entry 7672; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7673; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7674; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7675; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7676; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7677; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7678; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7679; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7680; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7681; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7682; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7683; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7684; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7685; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7686; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7687; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7688; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7689; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7690; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7691; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7692; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7693; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7694; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7695; SKIP-CACHE-INV-NEXT: s_endpgm 7696; 7697; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 7698; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7699; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7700; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7701; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7702; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7703; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7704; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7705; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7706; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7707; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7708; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7709; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7710; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7711; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7712; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7713; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7714; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7715; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7716; 7717; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 7718; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7719; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7720; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7721; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7722; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7723; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7724; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7725; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7726; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7727; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7728; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7729; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7730; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7731; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7732; GFX90A-TGSPLIT-NEXT: buffer_invl2 7733; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7734; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7735; GFX90A-TGSPLIT-NEXT: s_endpgm 7736; 7737; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 7738; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7739; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7740; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7741; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7742; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7743; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7744; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7745; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7746; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7747; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7748; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7749; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7750; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7751; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7752; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 7753; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7754; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7755; 7756; GFX940-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: 7757; GFX940-TGSPLIT: ; %bb.0: ; %entry 7758; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7759; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7760; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7761; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7762; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7763; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7764; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7765; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7766; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7767; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7768; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7769; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 7770; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7771; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 7772; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7773; GFX940-TGSPLIT-NEXT: s_endpgm 7774; 7775; GFX11-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: 7776; GFX11-WGP: ; %bb.0: ; %entry 7777; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7778; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7779; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7780; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7781; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7782; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7783; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7784; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7785; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7786; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7787; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7788; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7789; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7790; GFX11-WGP-NEXT: buffer_gl1_inv 7791; GFX11-WGP-NEXT: buffer_gl0_inv 7792; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7793; GFX11-WGP-NEXT: s_endpgm 7794; 7795; GFX11-CU-LABEL: global_system_release_acquire_ret_cmpxchg: 7796; GFX11-CU: ; %bb.0: ; %entry 7797; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7798; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7799; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7800; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7801; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7802; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7803; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7804; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7805; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7806; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7807; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 7808; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7809; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7810; GFX11-CU-NEXT: buffer_gl1_inv 7811; GFX11-CU-NEXT: buffer_gl0_inv 7812; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7813; GFX11-CU-NEXT: s_endpgm 7814; 7815; GFX12-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: 7816; GFX12-WGP: ; %bb.0: ; %entry 7817; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7818; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7819; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7820; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7821; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7822; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7823; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7824; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7825; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7826; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 7827; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7828; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7829; GFX12-WGP-NEXT: s_wait_storecnt 0x0 7830; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7831; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7832; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7833; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7834; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7835; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 7836; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7837; GFX12-WGP-NEXT: s_endpgm 7838; 7839; GFX12-CU-LABEL: global_system_release_acquire_ret_cmpxchg: 7840; GFX12-CU: ; %bb.0: ; %entry 7841; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7842; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7843; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7844; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7845; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7846; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7847; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7848; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7849; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7850; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 7851; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7852; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7853; GFX12-CU-NEXT: s_wait_storecnt 0x0 7854; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7855; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7856; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7857; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7858; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7859; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 7860; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7861; GFX12-CU-NEXT: s_endpgm 7862 ptr addrspace(1) %out, i32 %in, i32 %old) { 7863entry: 7864 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7865 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire 7866 %val0 = extractvalue { i32, i1 } %val, 0 7867 store i32 %val0, ptr addrspace(1) %out, align 4 7868 ret void 7869} 7870 7871define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( 7872; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7873; GFX6: ; %bb.0: ; %entry 7874; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7875; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7876; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7877; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7878; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7879; GFX6-NEXT: s_mov_b32 s12, s5 7880; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7881; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7882; GFX6-NEXT: s_mov_b32 s11, -1 7883; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7884; GFX6-NEXT: s_mov_b32 s5, s12 7885; GFX6-NEXT: s_mov_b32 s6, s11 7886; GFX6-NEXT: s_mov_b32 s7, s10 7887; GFX6-NEXT: v_mov_b32_e32 v0, s9 7888; GFX6-NEXT: v_mov_b32_e32 v2, s8 7889; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7890; GFX6-NEXT: v_mov_b32_e32 v1, v2 7891; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7892; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7893; GFX6-NEXT: s_waitcnt vmcnt(0) 7894; GFX6-NEXT: buffer_wbinvl1 7895; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7896; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7897; GFX6-NEXT: s_endpgm 7898; 7899; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7900; GFX7: ; %bb.0: ; %entry 7901; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7902; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7903; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7904; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7905; GFX7-NEXT: s_mov_b64 s[12:13], 16 7906; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7907; GFX7-NEXT: s_mov_b32 s6, s4 7908; GFX7-NEXT: s_mov_b32 s7, s5 7909; GFX7-NEXT: s_mov_b32 s11, s12 7910; GFX7-NEXT: s_mov_b32 s10, s13 7911; GFX7-NEXT: s_add_u32 s6, s6, s11 7912; GFX7-NEXT: s_addc_u32 s10, s7, s10 7913; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7914; GFX7-NEXT: s_mov_b32 s7, s10 7915; GFX7-NEXT: v_mov_b32_e32 v2, s9 7916; GFX7-NEXT: v_mov_b32_e32 v0, s8 7917; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7918; GFX7-NEXT: v_mov_b32_e32 v3, v0 7919; GFX7-NEXT: v_mov_b32_e32 v0, s6 7920; GFX7-NEXT: v_mov_b32_e32 v1, s7 7921; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7922; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7923; GFX7-NEXT: s_waitcnt vmcnt(0) 7924; GFX7-NEXT: buffer_wbinvl1_vol 7925; GFX7-NEXT: v_mov_b32_e32 v0, s4 7926; GFX7-NEXT: v_mov_b32_e32 v1, s5 7927; GFX7-NEXT: flat_store_dword v[0:1], v2 7928; GFX7-NEXT: s_endpgm 7929; 7930; GFX10-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7931; GFX10-WGP: ; %bb.0: ; %entry 7932; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7933; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7934; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7935; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7936; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7937; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7938; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7939; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7940; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7941; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7942; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7943; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7944; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7945; GFX10-WGP-NEXT: buffer_gl1_inv 7946; GFX10-WGP-NEXT: buffer_gl0_inv 7947; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7948; GFX10-WGP-NEXT: s_endpgm 7949; 7950; GFX10-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7951; GFX10-CU: ; %bb.0: ; %entry 7952; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7953; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7954; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7955; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7956; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7957; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7958; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7959; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7960; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7961; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7962; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7963; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7964; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7965; GFX10-CU-NEXT: buffer_gl1_inv 7966; GFX10-CU-NEXT: buffer_gl0_inv 7967; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7968; GFX10-CU-NEXT: s_endpgm 7969; 7970; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7971; SKIP-CACHE-INV: ; %bb.0: ; %entry 7972; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7973; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7974; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7975; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7976; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7977; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7978; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7979; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7980; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7981; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7982; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7983; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7984; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7985; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7986; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7987; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7988; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7989; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7990; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7991; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7992; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7993; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7994; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7995; SKIP-CACHE-INV-NEXT: s_endpgm 7996; 7997; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 7998; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7999; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8000; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8001; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8002; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8003; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8004; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8005; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8006; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8007; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8008; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 8009; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8010; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8011; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8012; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8013; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8014; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8015; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8016; 8017; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8018; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8019; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8020; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8021; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8022; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8023; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8024; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8025; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8026; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8027; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8028; GFX90A-TGSPLIT-NEXT: buffer_wbl2 8029; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8030; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8031; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8032; GFX90A-TGSPLIT-NEXT: buffer_invl2 8033; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8034; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8035; GFX90A-TGSPLIT-NEXT: s_endpgm 8036; 8037; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8038; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8039; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8040; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8041; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8042; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8043; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8044; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8045; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8046; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8047; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8048; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8049; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8050; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8051; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8052; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8053; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8054; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8055; 8056; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8057; GFX940-TGSPLIT: ; %bb.0: ; %entry 8058; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8059; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8060; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8061; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8062; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8063; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8064; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8065; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8066; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8067; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8068; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8069; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8070; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8071; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8072; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8073; GFX940-TGSPLIT-NEXT: s_endpgm 8074; 8075; GFX11-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8076; GFX11-WGP: ; %bb.0: ; %entry 8077; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8078; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8079; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8080; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8081; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8082; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8083; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8084; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8085; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8086; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8087; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8088; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8089; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8090; GFX11-WGP-NEXT: buffer_gl1_inv 8091; GFX11-WGP-NEXT: buffer_gl0_inv 8092; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8093; GFX11-WGP-NEXT: s_endpgm 8094; 8095; GFX11-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8096; GFX11-CU: ; %bb.0: ; %entry 8097; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8098; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8099; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8100; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8101; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8102; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8103; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8104; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8105; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8106; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8107; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8108; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8109; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8110; GFX11-CU-NEXT: buffer_gl1_inv 8111; GFX11-CU-NEXT: buffer_gl0_inv 8112; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8113; GFX11-CU-NEXT: s_endpgm 8114; 8115; GFX12-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8116; GFX12-WGP: ; %bb.0: ; %entry 8117; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8118; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8119; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8120; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8121; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8122; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8123; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8124; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8125; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8126; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 8127; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8128; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8129; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8130; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8131; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8132; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8133; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8134; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8135; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8136; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8137; GFX12-WGP-NEXT: s_endpgm 8138; 8139; GFX12-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 8140; GFX12-CU: ; %bb.0: ; %entry 8141; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8142; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8143; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8144; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8145; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8146; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8147; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8148; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8149; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8150; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 8151; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8152; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8153; GFX12-CU-NEXT: s_wait_storecnt 0x0 8154; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8155; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8156; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8157; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8158; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8159; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8160; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8161; GFX12-CU-NEXT: s_endpgm 8162 ptr addrspace(1) %out, i32 %in, i32 %old) { 8163entry: 8164 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8165 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire 8166 %val0 = extractvalue { i32, i1 } %val, 0 8167 store i32 %val0, ptr addrspace(1) %out, align 4 8168 ret void 8169} 8170 8171define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( 8172; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8173; GFX6: ; %bb.0: ; %entry 8174; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8175; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8176; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8177; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8178; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8179; GFX6-NEXT: s_mov_b32 s12, s5 8180; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8181; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8182; GFX6-NEXT: s_mov_b32 s11, -1 8183; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8184; GFX6-NEXT: s_mov_b32 s5, s12 8185; GFX6-NEXT: s_mov_b32 s6, s11 8186; GFX6-NEXT: s_mov_b32 s7, s10 8187; GFX6-NEXT: v_mov_b32_e32 v0, s9 8188; GFX6-NEXT: v_mov_b32_e32 v2, s8 8189; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8190; GFX6-NEXT: v_mov_b32_e32 v1, v2 8191; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8192; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8193; GFX6-NEXT: s_waitcnt vmcnt(0) 8194; GFX6-NEXT: buffer_wbinvl1 8195; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8196; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8197; GFX6-NEXT: s_endpgm 8198; 8199; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8200; GFX7: ; %bb.0: ; %entry 8201; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8202; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8203; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8204; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8205; GFX7-NEXT: s_mov_b64 s[12:13], 16 8206; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8207; GFX7-NEXT: s_mov_b32 s6, s4 8208; GFX7-NEXT: s_mov_b32 s7, s5 8209; GFX7-NEXT: s_mov_b32 s11, s12 8210; GFX7-NEXT: s_mov_b32 s10, s13 8211; GFX7-NEXT: s_add_u32 s6, s6, s11 8212; GFX7-NEXT: s_addc_u32 s10, s7, s10 8213; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8214; GFX7-NEXT: s_mov_b32 s7, s10 8215; GFX7-NEXT: v_mov_b32_e32 v2, s9 8216; GFX7-NEXT: v_mov_b32_e32 v0, s8 8217; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8218; GFX7-NEXT: v_mov_b32_e32 v3, v0 8219; GFX7-NEXT: v_mov_b32_e32 v0, s6 8220; GFX7-NEXT: v_mov_b32_e32 v1, s7 8221; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8222; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8223; GFX7-NEXT: s_waitcnt vmcnt(0) 8224; GFX7-NEXT: buffer_wbinvl1_vol 8225; GFX7-NEXT: v_mov_b32_e32 v0, s4 8226; GFX7-NEXT: v_mov_b32_e32 v1, s5 8227; GFX7-NEXT: flat_store_dword v[0:1], v2 8228; GFX7-NEXT: s_endpgm 8229; 8230; GFX10-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8231; GFX10-WGP: ; %bb.0: ; %entry 8232; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8233; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8234; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8235; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8236; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8237; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8238; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8239; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8240; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8241; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8242; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8243; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8244; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8245; GFX10-WGP-NEXT: buffer_gl1_inv 8246; GFX10-WGP-NEXT: buffer_gl0_inv 8247; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8248; GFX10-WGP-NEXT: s_endpgm 8249; 8250; GFX10-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8251; GFX10-CU: ; %bb.0: ; %entry 8252; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8253; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8254; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8255; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8256; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8257; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8258; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8259; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8260; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8261; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8262; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8263; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8264; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8265; GFX10-CU-NEXT: buffer_gl1_inv 8266; GFX10-CU-NEXT: buffer_gl0_inv 8267; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8268; GFX10-CU-NEXT: s_endpgm 8269; 8270; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8271; SKIP-CACHE-INV: ; %bb.0: ; %entry 8272; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8273; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8274; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8275; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8276; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8277; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8278; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8279; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8280; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8281; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8282; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8283; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8284; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8285; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8286; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8287; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8288; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8289; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8290; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8291; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8292; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8293; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8294; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8295; SKIP-CACHE-INV-NEXT: s_endpgm 8296; 8297; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8298; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8299; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8300; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8301; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8302; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8303; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8304; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8305; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8306; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8307; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8308; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 8309; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8310; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8311; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8312; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8313; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8314; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8315; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8316; 8317; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8318; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8319; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8320; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8321; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8322; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8323; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8324; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8325; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8326; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8327; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8328; GFX90A-TGSPLIT-NEXT: buffer_wbl2 8329; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8330; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8331; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8332; GFX90A-TGSPLIT-NEXT: buffer_invl2 8333; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8334; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8335; GFX90A-TGSPLIT-NEXT: s_endpgm 8336; 8337; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8338; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8339; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8340; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8341; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8342; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8343; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8344; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8345; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8346; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8347; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8348; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8349; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8350; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8351; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8352; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8353; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8354; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8355; 8356; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8357; GFX940-TGSPLIT: ; %bb.0: ; %entry 8358; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8359; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8360; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8361; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8362; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8363; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8364; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8365; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8366; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8367; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8368; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8369; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8370; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8371; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8372; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8373; GFX940-TGSPLIT-NEXT: s_endpgm 8374; 8375; GFX11-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8376; GFX11-WGP: ; %bb.0: ; %entry 8377; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8378; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8379; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8380; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8381; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8382; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8383; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8384; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8385; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8386; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8387; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8388; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8389; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8390; GFX11-WGP-NEXT: buffer_gl1_inv 8391; GFX11-WGP-NEXT: buffer_gl0_inv 8392; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8393; GFX11-WGP-NEXT: s_endpgm 8394; 8395; GFX11-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8396; GFX11-CU: ; %bb.0: ; %entry 8397; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8398; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8399; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8400; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8401; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8402; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8403; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8404; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8405; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8406; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8407; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8408; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8409; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8410; GFX11-CU-NEXT: buffer_gl1_inv 8411; GFX11-CU-NEXT: buffer_gl0_inv 8412; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8413; GFX11-CU-NEXT: s_endpgm 8414; 8415; GFX12-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8416; GFX12-WGP: ; %bb.0: ; %entry 8417; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8418; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8419; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8420; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8421; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8422; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8423; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8424; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8425; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8426; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 8427; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8428; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8429; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8430; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8431; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8432; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8433; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8434; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8435; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8436; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8437; GFX12-WGP-NEXT: s_endpgm 8438; 8439; GFX12-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 8440; GFX12-CU: ; %bb.0: ; %entry 8441; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8442; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8443; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8444; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8445; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8446; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8447; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8448; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8449; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8450; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 8451; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8452; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8453; GFX12-CU-NEXT: s_wait_storecnt 0x0 8454; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8455; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8456; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8457; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8458; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8459; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8460; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8461; GFX12-CU-NEXT: s_endpgm 8462 ptr addrspace(1) %out, i32 %in, i32 %old) { 8463entry: 8464 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8465 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire 8466 %val0 = extractvalue { i32, i1 } %val, 0 8467 store i32 %val0, ptr addrspace(1) %out, align 4 8468 ret void 8469} 8470 8471define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( 8472; GFX6-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8473; GFX6: ; %bb.0: ; %entry 8474; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8475; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8476; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8477; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8478; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8479; GFX6-NEXT: s_mov_b32 s12, s5 8480; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8481; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8482; GFX6-NEXT: s_mov_b32 s11, -1 8483; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8484; GFX6-NEXT: s_mov_b32 s5, s12 8485; GFX6-NEXT: s_mov_b32 s6, s11 8486; GFX6-NEXT: s_mov_b32 s7, s10 8487; GFX6-NEXT: v_mov_b32_e32 v0, s9 8488; GFX6-NEXT: v_mov_b32_e32 v2, s8 8489; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8490; GFX6-NEXT: v_mov_b32_e32 v1, v2 8491; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8492; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8493; GFX6-NEXT: s_waitcnt vmcnt(0) 8494; GFX6-NEXT: buffer_wbinvl1 8495; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8496; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8497; GFX6-NEXT: s_endpgm 8498; 8499; GFX7-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8500; GFX7: ; %bb.0: ; %entry 8501; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8502; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8503; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8504; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8505; GFX7-NEXT: s_mov_b64 s[12:13], 16 8506; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8507; GFX7-NEXT: s_mov_b32 s6, s4 8508; GFX7-NEXT: s_mov_b32 s7, s5 8509; GFX7-NEXT: s_mov_b32 s11, s12 8510; GFX7-NEXT: s_mov_b32 s10, s13 8511; GFX7-NEXT: s_add_u32 s6, s6, s11 8512; GFX7-NEXT: s_addc_u32 s10, s7, s10 8513; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8514; GFX7-NEXT: s_mov_b32 s7, s10 8515; GFX7-NEXT: v_mov_b32_e32 v2, s9 8516; GFX7-NEXT: v_mov_b32_e32 v0, s8 8517; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8518; GFX7-NEXT: v_mov_b32_e32 v3, v0 8519; GFX7-NEXT: v_mov_b32_e32 v0, s6 8520; GFX7-NEXT: v_mov_b32_e32 v1, s7 8521; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8522; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8523; GFX7-NEXT: s_waitcnt vmcnt(0) 8524; GFX7-NEXT: buffer_wbinvl1_vol 8525; GFX7-NEXT: v_mov_b32_e32 v0, s4 8526; GFX7-NEXT: v_mov_b32_e32 v1, s5 8527; GFX7-NEXT: flat_store_dword v[0:1], v2 8528; GFX7-NEXT: s_endpgm 8529; 8530; GFX10-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8531; GFX10-WGP: ; %bb.0: ; %entry 8532; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8533; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8534; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8535; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8536; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8537; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8538; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8539; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8540; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8541; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8542; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8543; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8544; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8545; GFX10-WGP-NEXT: buffer_gl1_inv 8546; GFX10-WGP-NEXT: buffer_gl0_inv 8547; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8548; GFX10-WGP-NEXT: s_endpgm 8549; 8550; GFX10-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8551; GFX10-CU: ; %bb.0: ; %entry 8552; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8553; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8554; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8555; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8556; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8557; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8558; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8559; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8560; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8561; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8562; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8563; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8564; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8565; GFX10-CU-NEXT: buffer_gl1_inv 8566; GFX10-CU-NEXT: buffer_gl0_inv 8567; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8568; GFX10-CU-NEXT: s_endpgm 8569; 8570; SKIP-CACHE-INV-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8571; SKIP-CACHE-INV: ; %bb.0: ; %entry 8572; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8573; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8574; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8575; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8576; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8577; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8578; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8579; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8580; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8581; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8582; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8583; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8584; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8585; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8586; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8587; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8588; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8589; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8590; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8591; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8592; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8593; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8594; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8595; SKIP-CACHE-INV-NEXT: s_endpgm 8596; 8597; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8598; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8599; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8600; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8601; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8602; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8603; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8604; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8605; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8606; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8607; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8608; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 8609; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8610; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8611; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8612; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8613; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8614; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8615; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8616; 8617; GFX90A-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8618; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8619; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8620; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8621; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8622; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8623; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8624; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8625; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8626; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8627; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8628; GFX90A-TGSPLIT-NEXT: buffer_wbl2 8629; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8630; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8631; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8632; GFX90A-TGSPLIT-NEXT: buffer_invl2 8633; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8634; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8635; GFX90A-TGSPLIT-NEXT: s_endpgm 8636; 8637; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8638; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8639; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8640; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8641; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8642; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8643; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8644; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8645; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8646; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8647; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8648; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8649; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8650; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8651; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8652; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8653; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8654; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8655; 8656; GFX940-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8657; GFX940-TGSPLIT: ; %bb.0: ; %entry 8658; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8659; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8660; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8661; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8662; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8663; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8664; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8665; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8666; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8667; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8668; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8669; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8670; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8671; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8672; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8673; GFX940-TGSPLIT-NEXT: s_endpgm 8674; 8675; GFX11-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8676; GFX11-WGP: ; %bb.0: ; %entry 8677; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8678; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8679; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8680; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8681; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8682; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8683; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8684; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8685; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8686; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8687; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8688; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8689; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8690; GFX11-WGP-NEXT: buffer_gl1_inv 8691; GFX11-WGP-NEXT: buffer_gl0_inv 8692; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8693; GFX11-WGP-NEXT: s_endpgm 8694; 8695; GFX11-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8696; GFX11-CU: ; %bb.0: ; %entry 8697; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8698; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8699; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8700; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8701; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8702; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8703; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8704; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8705; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8706; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8707; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8708; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8709; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8710; GFX11-CU-NEXT: buffer_gl1_inv 8711; GFX11-CU-NEXT: buffer_gl0_inv 8712; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8713; GFX11-CU-NEXT: s_endpgm 8714; 8715; GFX12-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8716; GFX12-WGP: ; %bb.0: ; %entry 8717; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8718; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8719; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8720; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8721; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8722; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8723; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8724; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8725; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8726; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 8727; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8728; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8729; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8730; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8731; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8732; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8733; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8734; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8735; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8736; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8737; GFX12-WGP-NEXT: s_endpgm 8738; 8739; GFX12-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: 8740; GFX12-CU: ; %bb.0: ; %entry 8741; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8742; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8743; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8744; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8745; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8746; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8747; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8748; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8749; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8750; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 8751; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8752; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8753; GFX12-CU-NEXT: s_wait_storecnt 0x0 8754; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8755; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8756; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8757; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8758; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8759; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8760; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8761; GFX12-CU-NEXT: s_endpgm 8762 ptr addrspace(1) %out, i32 %in, i32 %old) { 8763entry: 8764 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8765 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic seq_cst 8766 %val0 = extractvalue { i32, i1 } %val, 0 8767 store i32 %val0, ptr addrspace(1) %out, align 4 8768 ret void 8769} 8770 8771define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( 8772; GFX6-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8773; GFX6: ; %bb.0: ; %entry 8774; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8775; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8776; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8777; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8778; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8779; GFX6-NEXT: s_mov_b32 s12, s5 8780; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8781; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8782; GFX6-NEXT: s_mov_b32 s11, -1 8783; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8784; GFX6-NEXT: s_mov_b32 s5, s12 8785; GFX6-NEXT: s_mov_b32 s6, s11 8786; GFX6-NEXT: s_mov_b32 s7, s10 8787; GFX6-NEXT: v_mov_b32_e32 v0, s9 8788; GFX6-NEXT: v_mov_b32_e32 v2, s8 8789; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8790; GFX6-NEXT: v_mov_b32_e32 v1, v2 8791; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8792; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8793; GFX6-NEXT: s_waitcnt vmcnt(0) 8794; GFX6-NEXT: buffer_wbinvl1 8795; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8796; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8797; GFX6-NEXT: s_endpgm 8798; 8799; GFX7-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8800; GFX7: ; %bb.0: ; %entry 8801; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8802; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8803; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8804; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8805; GFX7-NEXT: s_mov_b64 s[12:13], 16 8806; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8807; GFX7-NEXT: s_mov_b32 s6, s4 8808; GFX7-NEXT: s_mov_b32 s7, s5 8809; GFX7-NEXT: s_mov_b32 s11, s12 8810; GFX7-NEXT: s_mov_b32 s10, s13 8811; GFX7-NEXT: s_add_u32 s6, s6, s11 8812; GFX7-NEXT: s_addc_u32 s10, s7, s10 8813; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8814; GFX7-NEXT: s_mov_b32 s7, s10 8815; GFX7-NEXT: v_mov_b32_e32 v2, s9 8816; GFX7-NEXT: v_mov_b32_e32 v0, s8 8817; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8818; GFX7-NEXT: v_mov_b32_e32 v3, v0 8819; GFX7-NEXT: v_mov_b32_e32 v0, s6 8820; GFX7-NEXT: v_mov_b32_e32 v1, s7 8821; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8822; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8823; GFX7-NEXT: s_waitcnt vmcnt(0) 8824; GFX7-NEXT: buffer_wbinvl1_vol 8825; GFX7-NEXT: v_mov_b32_e32 v0, s4 8826; GFX7-NEXT: v_mov_b32_e32 v1, s5 8827; GFX7-NEXT: flat_store_dword v[0:1], v2 8828; GFX7-NEXT: s_endpgm 8829; 8830; GFX10-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8831; GFX10-WGP: ; %bb.0: ; %entry 8832; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8833; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8834; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8835; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8836; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8837; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8838; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8839; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8840; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8841; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8842; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8843; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8844; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8845; GFX10-WGP-NEXT: buffer_gl1_inv 8846; GFX10-WGP-NEXT: buffer_gl0_inv 8847; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8848; GFX10-WGP-NEXT: s_endpgm 8849; 8850; GFX10-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8851; GFX10-CU: ; %bb.0: ; %entry 8852; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8853; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8854; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8855; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8856; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8857; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8858; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8859; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8860; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8861; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8862; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8863; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8864; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8865; GFX10-CU-NEXT: buffer_gl1_inv 8866; GFX10-CU-NEXT: buffer_gl0_inv 8867; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8868; GFX10-CU-NEXT: s_endpgm 8869; 8870; SKIP-CACHE-INV-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8871; SKIP-CACHE-INV: ; %bb.0: ; %entry 8872; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8873; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8874; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8875; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8876; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8877; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8878; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8879; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8880; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8881; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8882; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8883; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8884; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8885; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8886; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8887; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8888; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8889; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8890; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8891; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8892; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8893; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8894; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8895; SKIP-CACHE-INV-NEXT: s_endpgm 8896; 8897; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8898; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8899; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8900; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8901; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8902; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8903; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8904; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8905; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8906; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8907; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8908; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 8909; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8910; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8911; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8912; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8913; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8914; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8915; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8916; 8917; GFX90A-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8918; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8919; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8920; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8921; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8922; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8923; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8924; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8925; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8926; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8927; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8928; GFX90A-TGSPLIT-NEXT: buffer_wbl2 8929; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8930; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8931; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8932; GFX90A-TGSPLIT-NEXT: buffer_invl2 8933; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8934; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8935; GFX90A-TGSPLIT-NEXT: s_endpgm 8936; 8937; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8938; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8939; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8940; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8941; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8942; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8943; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8944; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8945; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8946; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8947; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8948; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8949; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8950; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8951; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8952; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8953; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8954; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8955; 8956; GFX940-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8957; GFX940-TGSPLIT: ; %bb.0: ; %entry 8958; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8959; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8960; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8961; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8962; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8963; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8964; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8965; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8966; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8967; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8968; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8969; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 8970; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8971; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8972; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8973; GFX940-TGSPLIT-NEXT: s_endpgm 8974; 8975; GFX11-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8976; GFX11-WGP: ; %bb.0: ; %entry 8977; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8978; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8979; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8980; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8981; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8982; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8983; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8984; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8985; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8986; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8987; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8988; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8989; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8990; GFX11-WGP-NEXT: buffer_gl1_inv 8991; GFX11-WGP-NEXT: buffer_gl0_inv 8992; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8993; GFX11-WGP-NEXT: s_endpgm 8994; 8995; GFX11-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 8996; GFX11-CU: ; %bb.0: ; %entry 8997; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8998; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8999; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9000; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9001; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9002; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9003; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9004; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9005; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9006; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9007; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9008; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9009; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9010; GFX11-CU-NEXT: buffer_gl1_inv 9011; GFX11-CU-NEXT: buffer_gl0_inv 9012; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9013; GFX11-CU-NEXT: s_endpgm 9014; 9015; GFX12-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 9016; GFX12-WGP: ; %bb.0: ; %entry 9017; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9018; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9019; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9020; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9021; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9022; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9023; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9024; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9025; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9026; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9027; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9028; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9029; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9030; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9031; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9032; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9033; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9034; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9035; GFX12-WGP-NEXT: s_endpgm 9036; 9037; GFX12-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: 9038; GFX12-CU: ; %bb.0: ; %entry 9039; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9040; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9041; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9042; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9043; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9044; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9045; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9046; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9047; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9048; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9049; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9050; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9051; GFX12-CU-NEXT: s_wait_storecnt 0x0 9052; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9053; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9054; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9055; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9056; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9057; GFX12-CU-NEXT: s_endpgm 9058 ptr addrspace(1) %out, i32 %in, i32 %old) { 9059entry: 9060 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9061 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire seq_cst 9062 %val0 = extractvalue { i32, i1 } %val, 0 9063 store i32 %val0, ptr addrspace(1) %out, align 4 9064 ret void 9065} 9066 9067define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( 9068; GFX6-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9069; GFX6: ; %bb.0: ; %entry 9070; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9071; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9072; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9073; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9074; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9075; GFX6-NEXT: s_mov_b32 s12, s5 9076; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9077; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9078; GFX6-NEXT: s_mov_b32 s11, -1 9079; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9080; GFX6-NEXT: s_mov_b32 s5, s12 9081; GFX6-NEXT: s_mov_b32 s6, s11 9082; GFX6-NEXT: s_mov_b32 s7, s10 9083; GFX6-NEXT: v_mov_b32_e32 v0, s9 9084; GFX6-NEXT: v_mov_b32_e32 v2, s8 9085; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9086; GFX6-NEXT: v_mov_b32_e32 v1, v2 9087; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9088; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9089; GFX6-NEXT: s_waitcnt vmcnt(0) 9090; GFX6-NEXT: buffer_wbinvl1 9091; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9092; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9093; GFX6-NEXT: s_endpgm 9094; 9095; GFX7-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9096; GFX7: ; %bb.0: ; %entry 9097; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9098; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9099; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9100; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9101; GFX7-NEXT: s_mov_b64 s[12:13], 16 9102; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9103; GFX7-NEXT: s_mov_b32 s6, s4 9104; GFX7-NEXT: s_mov_b32 s7, s5 9105; GFX7-NEXT: s_mov_b32 s11, s12 9106; GFX7-NEXT: s_mov_b32 s10, s13 9107; GFX7-NEXT: s_add_u32 s6, s6, s11 9108; GFX7-NEXT: s_addc_u32 s10, s7, s10 9109; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9110; GFX7-NEXT: s_mov_b32 s7, s10 9111; GFX7-NEXT: v_mov_b32_e32 v2, s9 9112; GFX7-NEXT: v_mov_b32_e32 v0, s8 9113; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9114; GFX7-NEXT: v_mov_b32_e32 v3, v0 9115; GFX7-NEXT: v_mov_b32_e32 v0, s6 9116; GFX7-NEXT: v_mov_b32_e32 v1, s7 9117; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9118; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9119; GFX7-NEXT: s_waitcnt vmcnt(0) 9120; GFX7-NEXT: buffer_wbinvl1_vol 9121; GFX7-NEXT: v_mov_b32_e32 v0, s4 9122; GFX7-NEXT: v_mov_b32_e32 v1, s5 9123; GFX7-NEXT: flat_store_dword v[0:1], v2 9124; GFX7-NEXT: s_endpgm 9125; 9126; GFX10-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9127; GFX10-WGP: ; %bb.0: ; %entry 9128; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9129; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9130; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9131; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9132; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9133; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9134; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9135; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9136; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9137; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9138; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9139; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9140; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9141; GFX10-WGP-NEXT: buffer_gl1_inv 9142; GFX10-WGP-NEXT: buffer_gl0_inv 9143; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9144; GFX10-WGP-NEXT: s_endpgm 9145; 9146; GFX10-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9147; GFX10-CU: ; %bb.0: ; %entry 9148; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9149; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9150; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9151; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9152; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9153; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9154; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9155; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9156; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9157; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9158; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9159; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9160; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9161; GFX10-CU-NEXT: buffer_gl1_inv 9162; GFX10-CU-NEXT: buffer_gl0_inv 9163; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9164; GFX10-CU-NEXT: s_endpgm 9165; 9166; SKIP-CACHE-INV-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9167; SKIP-CACHE-INV: ; %bb.0: ; %entry 9168; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9169; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9170; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9171; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9172; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9173; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9174; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9175; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9176; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9177; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9178; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9179; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9180; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9181; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9182; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9183; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9184; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9185; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9186; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9187; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9188; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9189; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9190; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9191; SKIP-CACHE-INV-NEXT: s_endpgm 9192; 9193; GFX90A-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9194; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9195; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9196; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9197; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9198; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9199; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9200; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9201; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9202; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9203; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9204; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9205; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9206; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9207; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9208; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9209; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9210; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9211; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9212; 9213; GFX90A-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9214; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9215; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9216; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9217; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9218; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9219; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9220; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9221; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9222; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9223; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9224; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9225; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9226; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9227; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9228; GFX90A-TGSPLIT-NEXT: buffer_invl2 9229; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9230; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9231; GFX90A-TGSPLIT-NEXT: s_endpgm 9232; 9233; GFX940-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9234; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9235; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9236; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9237; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9238; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9239; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9240; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9241; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9242; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9243; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9244; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9245; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9246; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9247; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9248; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9249; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9250; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9251; 9252; GFX940-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9253; GFX940-TGSPLIT: ; %bb.0: ; %entry 9254; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9255; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9256; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9257; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9258; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9259; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9260; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9261; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9262; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9263; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9264; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9265; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9266; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9267; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9268; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9269; GFX940-TGSPLIT-NEXT: s_endpgm 9270; 9271; GFX11-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9272; GFX11-WGP: ; %bb.0: ; %entry 9273; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9274; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9275; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9276; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9277; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9278; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9279; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9280; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9281; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9282; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9283; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9284; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9285; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9286; GFX11-WGP-NEXT: buffer_gl1_inv 9287; GFX11-WGP-NEXT: buffer_gl0_inv 9288; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9289; GFX11-WGP-NEXT: s_endpgm 9290; 9291; GFX11-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9292; GFX11-CU: ; %bb.0: ; %entry 9293; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9294; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9295; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9296; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9297; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9298; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9299; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9300; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9301; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9302; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9303; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9304; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9305; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9306; GFX11-CU-NEXT: buffer_gl1_inv 9307; GFX11-CU-NEXT: buffer_gl0_inv 9308; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9309; GFX11-CU-NEXT: s_endpgm 9310; 9311; GFX12-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9312; GFX12-WGP: ; %bb.0: ; %entry 9313; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9314; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9315; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9316; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9317; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9318; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9319; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9320; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9321; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9322; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9323; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9324; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9325; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9326; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9327; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9328; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9329; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9330; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9331; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9332; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9333; GFX12-WGP-NEXT: s_endpgm 9334; 9335; GFX12-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg: 9336; GFX12-CU: ; %bb.0: ; %entry 9337; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9338; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9339; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9340; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9341; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9342; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9343; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9344; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9345; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9346; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9347; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9348; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9349; GFX12-CU-NEXT: s_wait_storecnt 0x0 9350; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9351; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9352; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9353; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9354; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9355; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9356; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9357; GFX12-CU-NEXT: s_endpgm 9358 ptr addrspace(1) %out, i32 %in, i32 %old) { 9359entry: 9360 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9361 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release seq_cst 9362 %val0 = extractvalue { i32, i1 } %val, 0 9363 store i32 %val0, ptr addrspace(1) %out, align 4 9364 ret void 9365} 9366 9367define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( 9368; GFX6-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9369; GFX6: ; %bb.0: ; %entry 9370; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9371; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9372; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9373; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9374; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9375; GFX6-NEXT: s_mov_b32 s12, s5 9376; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9377; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9378; GFX6-NEXT: s_mov_b32 s11, -1 9379; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9380; GFX6-NEXT: s_mov_b32 s5, s12 9381; GFX6-NEXT: s_mov_b32 s6, s11 9382; GFX6-NEXT: s_mov_b32 s7, s10 9383; GFX6-NEXT: v_mov_b32_e32 v0, s9 9384; GFX6-NEXT: v_mov_b32_e32 v2, s8 9385; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9386; GFX6-NEXT: v_mov_b32_e32 v1, v2 9387; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9388; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9389; GFX6-NEXT: s_waitcnt vmcnt(0) 9390; GFX6-NEXT: buffer_wbinvl1 9391; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9392; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9393; GFX6-NEXT: s_endpgm 9394; 9395; GFX7-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9396; GFX7: ; %bb.0: ; %entry 9397; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9398; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9399; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9400; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9401; GFX7-NEXT: s_mov_b64 s[12:13], 16 9402; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9403; GFX7-NEXT: s_mov_b32 s6, s4 9404; GFX7-NEXT: s_mov_b32 s7, s5 9405; GFX7-NEXT: s_mov_b32 s11, s12 9406; GFX7-NEXT: s_mov_b32 s10, s13 9407; GFX7-NEXT: s_add_u32 s6, s6, s11 9408; GFX7-NEXT: s_addc_u32 s10, s7, s10 9409; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9410; GFX7-NEXT: s_mov_b32 s7, s10 9411; GFX7-NEXT: v_mov_b32_e32 v2, s9 9412; GFX7-NEXT: v_mov_b32_e32 v0, s8 9413; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9414; GFX7-NEXT: v_mov_b32_e32 v3, v0 9415; GFX7-NEXT: v_mov_b32_e32 v0, s6 9416; GFX7-NEXT: v_mov_b32_e32 v1, s7 9417; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9418; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9419; GFX7-NEXT: s_waitcnt vmcnt(0) 9420; GFX7-NEXT: buffer_wbinvl1_vol 9421; GFX7-NEXT: v_mov_b32_e32 v0, s4 9422; GFX7-NEXT: v_mov_b32_e32 v1, s5 9423; GFX7-NEXT: flat_store_dword v[0:1], v2 9424; GFX7-NEXT: s_endpgm 9425; 9426; GFX10-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9427; GFX10-WGP: ; %bb.0: ; %entry 9428; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9429; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9430; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9431; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9432; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9433; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9434; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9435; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9436; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9437; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9438; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9439; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9440; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9441; GFX10-WGP-NEXT: buffer_gl1_inv 9442; GFX10-WGP-NEXT: buffer_gl0_inv 9443; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9444; GFX10-WGP-NEXT: s_endpgm 9445; 9446; GFX10-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9447; GFX10-CU: ; %bb.0: ; %entry 9448; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9449; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9450; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9451; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9452; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9453; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9454; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9455; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9456; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9457; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9458; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9459; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9460; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9461; GFX10-CU-NEXT: buffer_gl1_inv 9462; GFX10-CU-NEXT: buffer_gl0_inv 9463; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9464; GFX10-CU-NEXT: s_endpgm 9465; 9466; SKIP-CACHE-INV-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9467; SKIP-CACHE-INV: ; %bb.0: ; %entry 9468; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9469; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9470; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9471; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9472; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9473; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9474; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9475; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9476; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9477; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9478; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9479; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9480; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9481; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9482; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9483; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9484; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9485; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9486; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9487; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9488; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9489; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9490; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9491; SKIP-CACHE-INV-NEXT: s_endpgm 9492; 9493; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9494; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9495; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9496; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9497; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9498; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9499; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9500; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9501; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9502; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9503; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9504; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9505; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9506; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9507; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9508; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9509; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9510; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9511; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9512; 9513; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9514; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9515; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9516; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9517; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9518; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9519; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9520; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9521; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9522; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9523; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9524; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9525; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9526; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9527; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9528; GFX90A-TGSPLIT-NEXT: buffer_invl2 9529; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9530; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9531; GFX90A-TGSPLIT-NEXT: s_endpgm 9532; 9533; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9534; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9535; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9536; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9537; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9538; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9539; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9540; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9541; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9542; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9543; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9544; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9545; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9546; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9547; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9548; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9549; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9550; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9551; 9552; GFX940-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9553; GFX940-TGSPLIT: ; %bb.0: ; %entry 9554; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9555; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9556; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9557; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9558; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9559; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9560; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9561; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9562; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9563; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9564; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9565; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9566; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9567; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9568; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9569; GFX940-TGSPLIT-NEXT: s_endpgm 9570; 9571; GFX11-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9572; GFX11-WGP: ; %bb.0: ; %entry 9573; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9574; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9575; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9576; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9577; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9578; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9579; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9580; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9581; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9582; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9583; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9584; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9585; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9586; GFX11-WGP-NEXT: buffer_gl1_inv 9587; GFX11-WGP-NEXT: buffer_gl0_inv 9588; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9589; GFX11-WGP-NEXT: s_endpgm 9590; 9591; GFX11-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9592; GFX11-CU: ; %bb.0: ; %entry 9593; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9594; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9595; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9596; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9597; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9598; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9599; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9600; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9601; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9602; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9603; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9604; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9605; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9606; GFX11-CU-NEXT: buffer_gl1_inv 9607; GFX11-CU-NEXT: buffer_gl0_inv 9608; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9609; GFX11-CU-NEXT: s_endpgm 9610; 9611; GFX12-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9612; GFX12-WGP: ; %bb.0: ; %entry 9613; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9614; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9615; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9616; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9617; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9618; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9619; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9620; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9621; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9622; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9623; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9624; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9625; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9626; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9627; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9628; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9629; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9630; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9631; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9632; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9633; GFX12-WGP-NEXT: s_endpgm 9634; 9635; GFX12-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: 9636; GFX12-CU: ; %bb.0: ; %entry 9637; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9638; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9639; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9640; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9641; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9642; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9643; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9644; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9645; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9646; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9647; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9648; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9649; GFX12-CU-NEXT: s_wait_storecnt 0x0 9650; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9651; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9652; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9653; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9654; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9655; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9656; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9657; GFX12-CU-NEXT: s_endpgm 9658 ptr addrspace(1) %out, i32 %in, i32 %old) { 9659entry: 9660 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9661 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel seq_cst 9662 %val0 = extractvalue { i32, i1 } %val, 0 9663 store i32 %val0, ptr addrspace(1) %out, align 4 9664 ret void 9665} 9666 9667define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( 9668; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9669; GFX6: ; %bb.0: ; %entry 9670; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9671; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9672; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9673; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9674; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9675; GFX6-NEXT: s_mov_b32 s12, s5 9676; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9677; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9678; GFX6-NEXT: s_mov_b32 s11, -1 9679; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9680; GFX6-NEXT: s_mov_b32 s5, s12 9681; GFX6-NEXT: s_mov_b32 s6, s11 9682; GFX6-NEXT: s_mov_b32 s7, s10 9683; GFX6-NEXT: v_mov_b32_e32 v0, s9 9684; GFX6-NEXT: v_mov_b32_e32 v2, s8 9685; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9686; GFX6-NEXT: v_mov_b32_e32 v1, v2 9687; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9688; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9689; GFX6-NEXT: s_waitcnt vmcnt(0) 9690; GFX6-NEXT: buffer_wbinvl1 9691; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9692; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9693; GFX6-NEXT: s_endpgm 9694; 9695; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9696; GFX7: ; %bb.0: ; %entry 9697; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9698; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9699; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9700; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9701; GFX7-NEXT: s_mov_b64 s[12:13], 16 9702; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9703; GFX7-NEXT: s_mov_b32 s6, s4 9704; GFX7-NEXT: s_mov_b32 s7, s5 9705; GFX7-NEXT: s_mov_b32 s11, s12 9706; GFX7-NEXT: s_mov_b32 s10, s13 9707; GFX7-NEXT: s_add_u32 s6, s6, s11 9708; GFX7-NEXT: s_addc_u32 s10, s7, s10 9709; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9710; GFX7-NEXT: s_mov_b32 s7, s10 9711; GFX7-NEXT: v_mov_b32_e32 v2, s9 9712; GFX7-NEXT: v_mov_b32_e32 v0, s8 9713; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9714; GFX7-NEXT: v_mov_b32_e32 v3, v0 9715; GFX7-NEXT: v_mov_b32_e32 v0, s6 9716; GFX7-NEXT: v_mov_b32_e32 v1, s7 9717; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9718; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9719; GFX7-NEXT: s_waitcnt vmcnt(0) 9720; GFX7-NEXT: buffer_wbinvl1_vol 9721; GFX7-NEXT: v_mov_b32_e32 v0, s4 9722; GFX7-NEXT: v_mov_b32_e32 v1, s5 9723; GFX7-NEXT: flat_store_dword v[0:1], v2 9724; GFX7-NEXT: s_endpgm 9725; 9726; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9727; GFX10-WGP: ; %bb.0: ; %entry 9728; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9729; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9730; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9731; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9732; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9733; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9734; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9735; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9736; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9737; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9738; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9739; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9740; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9741; GFX10-WGP-NEXT: buffer_gl1_inv 9742; GFX10-WGP-NEXT: buffer_gl0_inv 9743; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9744; GFX10-WGP-NEXT: s_endpgm 9745; 9746; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9747; GFX10-CU: ; %bb.0: ; %entry 9748; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9749; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9750; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9751; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9752; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9753; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9754; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9755; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9756; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9757; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9758; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9759; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9760; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9761; GFX10-CU-NEXT: buffer_gl1_inv 9762; GFX10-CU-NEXT: buffer_gl0_inv 9763; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9764; GFX10-CU-NEXT: s_endpgm 9765; 9766; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9767; SKIP-CACHE-INV: ; %bb.0: ; %entry 9768; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9769; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9770; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9771; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9772; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9773; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9774; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9775; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9776; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9777; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9778; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9779; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9780; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9781; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9782; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9783; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9784; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9785; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9786; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9787; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9788; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9789; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9790; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9791; SKIP-CACHE-INV-NEXT: s_endpgm 9792; 9793; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9794; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9795; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9796; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9797; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9798; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9799; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9800; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9801; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9802; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9803; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9804; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9805; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9806; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9807; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9808; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9809; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9810; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9811; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9812; 9813; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9814; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9815; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9816; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9817; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9818; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9819; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9820; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9821; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9822; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9823; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9824; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9825; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9826; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9827; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9828; GFX90A-TGSPLIT-NEXT: buffer_invl2 9829; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9830; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9831; GFX90A-TGSPLIT-NEXT: s_endpgm 9832; 9833; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9834; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9835; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9836; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9837; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9838; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9839; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9840; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9841; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9842; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9843; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9844; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9845; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9846; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9847; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9848; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9849; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9850; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9851; 9852; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9853; GFX940-TGSPLIT: ; %bb.0: ; %entry 9854; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9855; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9856; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9857; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9858; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9859; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9860; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9861; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9862; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9863; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9864; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9865; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 9866; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9867; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9868; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9869; GFX940-TGSPLIT-NEXT: s_endpgm 9870; 9871; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9872; GFX11-WGP: ; %bb.0: ; %entry 9873; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9874; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9875; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9876; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9877; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9878; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9879; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9880; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9881; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9882; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9883; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9884; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9885; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9886; GFX11-WGP-NEXT: buffer_gl1_inv 9887; GFX11-WGP-NEXT: buffer_gl0_inv 9888; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9889; GFX11-WGP-NEXT: s_endpgm 9890; 9891; GFX11-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9892; GFX11-CU: ; %bb.0: ; %entry 9893; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9894; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9895; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9896; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9897; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9898; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9899; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9900; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9901; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9902; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9903; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9904; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9905; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9906; GFX11-CU-NEXT: buffer_gl1_inv 9907; GFX11-CU-NEXT: buffer_gl0_inv 9908; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9909; GFX11-CU-NEXT: s_endpgm 9910; 9911; GFX12-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9912; GFX12-WGP: ; %bb.0: ; %entry 9913; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9914; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9915; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9916; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9917; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9918; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9919; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9920; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9921; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9922; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9923; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9924; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9925; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9926; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9927; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9928; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9929; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9930; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9931; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9932; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9933; GFX12-WGP-NEXT: s_endpgm 9934; 9935; GFX12-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 9936; GFX12-CU: ; %bb.0: ; %entry 9937; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9938; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9939; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9940; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9941; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9942; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9943; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9944; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9945; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9946; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9947; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9948; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9949; GFX12-CU-NEXT: s_wait_storecnt 0x0 9950; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9951; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9952; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9953; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9954; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9955; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9956; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9957; GFX12-CU-NEXT: s_endpgm 9958 ptr addrspace(1) %out, i32 %in, i32 %old) { 9959entry: 9960 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9961 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst 9962 %val0 = extractvalue { i32, i1 } %val, 0 9963 store i32 %val0, ptr addrspace(1) %out, align 4 9964 ret void 9965} 9966 9967define amdgpu_kernel void @global_system_one_as_unordered_load( 9968; GFX6-LABEL: global_system_one_as_unordered_load: 9969; GFX6: ; %bb.0: ; %entry 9970; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 9971; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 9972; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 9973; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9974; GFX6-NEXT: s_mov_b32 s6, s9 9975; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 9976; GFX6-NEXT: s_mov_b32 s12, 0x100f000 9977; GFX6-NEXT: s_mov_b32 s13, -1 9978; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 9979; GFX6-NEXT: s_mov_b32 s9, s6 9980; GFX6-NEXT: s_mov_b32 s10, s13 9981; GFX6-NEXT: s_mov_b32 s11, s12 9982; GFX6-NEXT: s_mov_b32 s14, s5 9983; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9984; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9985; GFX6-NEXT: s_mov_b32 s5, s14 9986; GFX6-NEXT: s_mov_b32 s6, s13 9987; GFX6-NEXT: s_mov_b32 s7, s12 9988; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 9989; GFX6-NEXT: s_waitcnt vmcnt(0) 9990; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9991; GFX6-NEXT: s_endpgm 9992; 9993; GFX7-LABEL: global_system_one_as_unordered_load: 9994; GFX7: ; %bb.0: ; %entry 9995; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9996; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 9997; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9998; GFX7-NEXT: v_mov_b32_e32 v0, s6 9999; GFX7-NEXT: v_mov_b32_e32 v1, s7 10000; GFX7-NEXT: flat_load_dword v2, v[0:1] 10001; GFX7-NEXT: v_mov_b32_e32 v0, s4 10002; GFX7-NEXT: v_mov_b32_e32 v1, s5 10003; GFX7-NEXT: s_waitcnt vmcnt(0) 10004; GFX7-NEXT: flat_store_dword v[0:1], v2 10005; GFX7-NEXT: s_endpgm 10006; 10007; GFX10-WGP-LABEL: global_system_one_as_unordered_load: 10008; GFX10-WGP: ; %bb.0: ; %entry 10009; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10010; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10011; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10012; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10013; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 10014; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10015; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10016; GFX10-WGP-NEXT: s_endpgm 10017; 10018; GFX10-CU-LABEL: global_system_one_as_unordered_load: 10019; GFX10-CU: ; %bb.0: ; %entry 10020; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10021; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10022; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10023; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10024; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 10025; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10026; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10027; GFX10-CU-NEXT: s_endpgm 10028; 10029; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load: 10030; SKIP-CACHE-INV: ; %bb.0: ; %entry 10031; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10032; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10033; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10034; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10035; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10036; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10037; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10038; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10039; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10040; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10041; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10042; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10043; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10044; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10045; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10046; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10047; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10048; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10049; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 10050; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10051; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10052; SKIP-CACHE-INV-NEXT: s_endpgm 10053; 10054; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load: 10055; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10056; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10057; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10058; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10059; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10060; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10061; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10062; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10063; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10064; 10065; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_load: 10066; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10067; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10068; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10069; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10070; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10071; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10072; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10073; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10074; GFX90A-TGSPLIT-NEXT: s_endpgm 10075; 10076; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load: 10077; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10078; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10079; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10080; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10081; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10082; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10083; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10084; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10085; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10086; 10087; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_load: 10088; GFX940-TGSPLIT: ; %bb.0: ; %entry 10089; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10090; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10091; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10092; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10093; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10094; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10095; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10096; GFX940-TGSPLIT-NEXT: s_endpgm 10097; 10098; GFX11-WGP-LABEL: global_system_one_as_unordered_load: 10099; GFX11-WGP: ; %bb.0: ; %entry 10100; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10101; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10102; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10103; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10104; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10105; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10106; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10107; GFX11-WGP-NEXT: s_endpgm 10108; 10109; GFX11-CU-LABEL: global_system_one_as_unordered_load: 10110; GFX11-CU: ; %bb.0: ; %entry 10111; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10112; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10113; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10114; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10115; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10116; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10117; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10118; GFX11-CU-NEXT: s_endpgm 10119; 10120; GFX12-WGP-LABEL: global_system_one_as_unordered_load: 10121; GFX12-WGP: ; %bb.0: ; %entry 10122; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10123; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10124; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10125; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10126; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10127; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10128; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10129; GFX12-WGP-NEXT: s_endpgm 10130; 10131; GFX12-CU-LABEL: global_system_one_as_unordered_load: 10132; GFX12-CU: ; %bb.0: ; %entry 10133; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10134; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10135; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10136; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10137; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10138; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10139; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10140; GFX12-CU-NEXT: s_endpgm 10141 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10142entry: 10143 %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") unordered, align 4 10144 store i32 %val, ptr addrspace(1) %out 10145 ret void 10146} 10147 10148define amdgpu_kernel void @global_system_one_as_monotonic_load( 10149; GFX6-LABEL: global_system_one_as_monotonic_load: 10150; GFX6: ; %bb.0: ; %entry 10151; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10152; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 10153; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10154; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10155; GFX6-NEXT: s_mov_b32 s6, s9 10156; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 10157; GFX6-NEXT: s_mov_b32 s12, 0x100f000 10158; GFX6-NEXT: s_mov_b32 s13, -1 10159; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 10160; GFX6-NEXT: s_mov_b32 s9, s6 10161; GFX6-NEXT: s_mov_b32 s10, s13 10162; GFX6-NEXT: s_mov_b32 s11, s12 10163; GFX6-NEXT: s_mov_b32 s14, s5 10164; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10165; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10166; GFX6-NEXT: s_mov_b32 s5, s14 10167; GFX6-NEXT: s_mov_b32 s6, s13 10168; GFX6-NEXT: s_mov_b32 s7, s12 10169; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 10170; GFX6-NEXT: s_waitcnt vmcnt(0) 10171; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10172; GFX6-NEXT: s_endpgm 10173; 10174; GFX7-LABEL: global_system_one_as_monotonic_load: 10175; GFX7: ; %bb.0: ; %entry 10176; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10177; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 10178; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10179; GFX7-NEXT: v_mov_b32_e32 v0, s6 10180; GFX7-NEXT: v_mov_b32_e32 v1, s7 10181; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 10182; GFX7-NEXT: v_mov_b32_e32 v0, s4 10183; GFX7-NEXT: v_mov_b32_e32 v1, s5 10184; GFX7-NEXT: s_waitcnt vmcnt(0) 10185; GFX7-NEXT: flat_store_dword v[0:1], v2 10186; GFX7-NEXT: s_endpgm 10187; 10188; GFX10-WGP-LABEL: global_system_one_as_monotonic_load: 10189; GFX10-WGP: ; %bb.0: ; %entry 10190; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10191; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10192; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10193; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10194; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10195; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10196; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10197; GFX10-WGP-NEXT: s_endpgm 10198; 10199; GFX10-CU-LABEL: global_system_one_as_monotonic_load: 10200; GFX10-CU: ; %bb.0: ; %entry 10201; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10202; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10203; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10204; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10205; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10206; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10207; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10208; GFX10-CU-NEXT: s_endpgm 10209; 10210; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load: 10211; SKIP-CACHE-INV: ; %bb.0: ; %entry 10212; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10213; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10214; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10215; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10216; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10217; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10218; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10219; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10220; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10221; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10222; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10223; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10224; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10225; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10226; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10227; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10228; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10229; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10230; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 10231; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10232; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10233; SKIP-CACHE-INV-NEXT: s_endpgm 10234; 10235; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load: 10236; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10237; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10238; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10239; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10240; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10241; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10242; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10243; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10244; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10245; 10246; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_load: 10247; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10248; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10249; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10250; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10251; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10252; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10253; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10254; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10255; GFX90A-TGSPLIT-NEXT: s_endpgm 10256; 10257; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load: 10258; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10259; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10260; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10261; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10262; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10263; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10264; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10265; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10266; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10267; 10268; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_load: 10269; GFX940-TGSPLIT: ; %bb.0: ; %entry 10270; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10271; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10272; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10273; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10274; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10275; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10276; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10277; GFX940-TGSPLIT-NEXT: s_endpgm 10278; 10279; GFX11-WGP-LABEL: global_system_one_as_monotonic_load: 10280; GFX11-WGP: ; %bb.0: ; %entry 10281; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10282; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10283; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10284; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10285; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 10286; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10287; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10288; GFX11-WGP-NEXT: s_endpgm 10289; 10290; GFX11-CU-LABEL: global_system_one_as_monotonic_load: 10291; GFX11-CU: ; %bb.0: ; %entry 10292; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10293; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10294; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10295; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10296; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 10297; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10298; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10299; GFX11-CU-NEXT: s_endpgm 10300; 10301; GFX12-WGP-LABEL: global_system_one_as_monotonic_load: 10302; GFX12-WGP: ; %bb.0: ; %entry 10303; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10304; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10305; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10306; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10307; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10308; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10309; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10310; GFX12-WGP-NEXT: s_endpgm 10311; 10312; GFX12-CU-LABEL: global_system_one_as_monotonic_load: 10313; GFX12-CU: ; %bb.0: ; %entry 10314; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10315; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10316; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10317; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10318; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10319; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10320; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10321; GFX12-CU-NEXT: s_endpgm 10322 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10323entry: 10324 %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") monotonic, align 4 10325 store i32 %val, ptr addrspace(1) %out 10326 ret void 10327} 10328 10329define amdgpu_kernel void @global_system_one_as_acquire_load( 10330; GFX6-LABEL: global_system_one_as_acquire_load: 10331; GFX6: ; %bb.0: ; %entry 10332; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10333; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 10334; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10335; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10336; GFX6-NEXT: s_mov_b32 s6, s9 10337; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 10338; GFX6-NEXT: s_mov_b32 s12, 0x100f000 10339; GFX6-NEXT: s_mov_b32 s13, -1 10340; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 10341; GFX6-NEXT: s_mov_b32 s9, s6 10342; GFX6-NEXT: s_mov_b32 s10, s13 10343; GFX6-NEXT: s_mov_b32 s11, s12 10344; GFX6-NEXT: s_mov_b32 s14, s5 10345; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10346; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10347; GFX6-NEXT: s_mov_b32 s5, s14 10348; GFX6-NEXT: s_mov_b32 s6, s13 10349; GFX6-NEXT: s_mov_b32 s7, s12 10350; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 10351; GFX6-NEXT: s_waitcnt vmcnt(0) 10352; GFX6-NEXT: buffer_wbinvl1 10353; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10354; GFX6-NEXT: s_endpgm 10355; 10356; GFX7-LABEL: global_system_one_as_acquire_load: 10357; GFX7: ; %bb.0: ; %entry 10358; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10359; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 10360; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10361; GFX7-NEXT: v_mov_b32_e32 v0, s6 10362; GFX7-NEXT: v_mov_b32_e32 v1, s7 10363; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 10364; GFX7-NEXT: s_waitcnt vmcnt(0) 10365; GFX7-NEXT: buffer_wbinvl1_vol 10366; GFX7-NEXT: v_mov_b32_e32 v0, s4 10367; GFX7-NEXT: v_mov_b32_e32 v1, s5 10368; GFX7-NEXT: flat_store_dword v[0:1], v2 10369; GFX7-NEXT: s_endpgm 10370; 10371; GFX10-WGP-LABEL: global_system_one_as_acquire_load: 10372; GFX10-WGP: ; %bb.0: ; %entry 10373; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10374; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10375; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10376; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10377; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10378; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10379; GFX10-WGP-NEXT: buffer_gl1_inv 10380; GFX10-WGP-NEXT: buffer_gl0_inv 10381; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10382; GFX10-WGP-NEXT: s_endpgm 10383; 10384; GFX10-CU-LABEL: global_system_one_as_acquire_load: 10385; GFX10-CU: ; %bb.0: ; %entry 10386; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10387; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10388; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10389; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10390; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10391; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10392; GFX10-CU-NEXT: buffer_gl1_inv 10393; GFX10-CU-NEXT: buffer_gl0_inv 10394; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10395; GFX10-CU-NEXT: s_endpgm 10396; 10397; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load: 10398; SKIP-CACHE-INV: ; %bb.0: ; %entry 10399; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10400; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10401; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10402; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10403; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10404; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10405; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10406; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10407; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10408; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10409; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10410; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10411; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10412; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10413; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10414; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10415; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10416; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10417; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 10418; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10419; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10420; SKIP-CACHE-INV-NEXT: s_endpgm 10421; 10422; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load: 10423; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10424; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10425; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10426; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10427; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10428; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10430; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 10431; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10432; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10433; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10434; 10435; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_load: 10436; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10437; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10438; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10439; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10440; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10441; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10442; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10443; GFX90A-TGSPLIT-NEXT: buffer_invl2 10444; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10445; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10446; GFX90A-TGSPLIT-NEXT: s_endpgm 10447; 10448; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load: 10449; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10450; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10451; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10452; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10453; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10454; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10455; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10456; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 10457; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10458; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10459; 10460; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_load: 10461; GFX940-TGSPLIT: ; %bb.0: ; %entry 10462; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10463; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10464; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10465; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10466; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10467; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10468; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 10469; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10470; GFX940-TGSPLIT-NEXT: s_endpgm 10471; 10472; GFX11-WGP-LABEL: global_system_one_as_acquire_load: 10473; GFX11-WGP: ; %bb.0: ; %entry 10474; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10475; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10476; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10477; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10478; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 10479; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10480; GFX11-WGP-NEXT: buffer_gl1_inv 10481; GFX11-WGP-NEXT: buffer_gl0_inv 10482; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10483; GFX11-WGP-NEXT: s_endpgm 10484; 10485; GFX11-CU-LABEL: global_system_one_as_acquire_load: 10486; GFX11-CU: ; %bb.0: ; %entry 10487; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10488; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10489; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10490; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10491; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 10492; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10493; GFX11-CU-NEXT: buffer_gl1_inv 10494; GFX11-CU-NEXT: buffer_gl0_inv 10495; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10496; GFX11-CU-NEXT: s_endpgm 10497; 10498; GFX12-WGP-LABEL: global_system_one_as_acquire_load: 10499; GFX12-WGP: ; %bb.0: ; %entry 10500; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10501; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10502; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10503; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10504; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10505; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10506; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 10507; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10508; GFX12-WGP-NEXT: s_endpgm 10509; 10510; GFX12-CU-LABEL: global_system_one_as_acquire_load: 10511; GFX12-CU: ; %bb.0: ; %entry 10512; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10513; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10514; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10515; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10516; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10517; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10518; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 10519; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10520; GFX12-CU-NEXT: s_endpgm 10521 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10522entry: 10523 %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") acquire, align 4 10524 store i32 %val, ptr addrspace(1) %out 10525 ret void 10526} 10527 10528define amdgpu_kernel void @global_system_one_as_seq_cst_load( 10529; GFX6-LABEL: global_system_one_as_seq_cst_load: 10530; GFX6: ; %bb.0: ; %entry 10531; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10532; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 10533; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10534; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10535; GFX6-NEXT: s_mov_b32 s6, s9 10536; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 10537; GFX6-NEXT: s_mov_b32 s12, 0x100f000 10538; GFX6-NEXT: s_mov_b32 s13, -1 10539; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 10540; GFX6-NEXT: s_mov_b32 s9, s6 10541; GFX6-NEXT: s_mov_b32 s10, s13 10542; GFX6-NEXT: s_mov_b32 s11, s12 10543; GFX6-NEXT: s_mov_b32 s14, s5 10544; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10545; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10546; GFX6-NEXT: s_mov_b32 s5, s14 10547; GFX6-NEXT: s_mov_b32 s6, s13 10548; GFX6-NEXT: s_mov_b32 s7, s12 10549; GFX6-NEXT: s_waitcnt vmcnt(0) 10550; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 glc 10551; GFX6-NEXT: s_waitcnt vmcnt(0) 10552; GFX6-NEXT: buffer_wbinvl1 10553; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10554; GFX6-NEXT: s_endpgm 10555; 10556; GFX7-LABEL: global_system_one_as_seq_cst_load: 10557; GFX7: ; %bb.0: ; %entry 10558; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10559; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 10560; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10561; GFX7-NEXT: v_mov_b32_e32 v0, s6 10562; GFX7-NEXT: v_mov_b32_e32 v1, s7 10563; GFX7-NEXT: s_waitcnt vmcnt(0) 10564; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 10565; GFX7-NEXT: s_waitcnt vmcnt(0) 10566; GFX7-NEXT: buffer_wbinvl1_vol 10567; GFX7-NEXT: v_mov_b32_e32 v0, s4 10568; GFX7-NEXT: v_mov_b32_e32 v1, s5 10569; GFX7-NEXT: flat_store_dword v[0:1], v2 10570; GFX7-NEXT: s_endpgm 10571; 10572; GFX10-WGP-LABEL: global_system_one_as_seq_cst_load: 10573; GFX10-WGP: ; %bb.0: ; %entry 10574; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10575; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10576; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10577; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10578; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10579; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10580; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10581; GFX10-WGP-NEXT: buffer_gl1_inv 10582; GFX10-WGP-NEXT: buffer_gl0_inv 10583; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10584; GFX10-WGP-NEXT: s_endpgm 10585; 10586; GFX10-CU-LABEL: global_system_one_as_seq_cst_load: 10587; GFX10-CU: ; %bb.0: ; %entry 10588; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10589; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10590; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10591; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10592; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10593; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] glc dlc 10594; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10595; GFX10-CU-NEXT: buffer_gl1_inv 10596; GFX10-CU-NEXT: buffer_gl0_inv 10597; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10598; GFX10-CU-NEXT: s_endpgm 10599; 10600; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load: 10601; SKIP-CACHE-INV: ; %bb.0: ; %entry 10602; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10603; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10604; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10605; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10606; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10607; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10608; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10609; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10610; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10611; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10612; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10613; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10614; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10615; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10616; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10617; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10618; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10619; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10620; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10621; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 10622; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10623; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10624; SKIP-CACHE-INV-NEXT: s_endpgm 10625; 10626; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load: 10627; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10628; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10629; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10630; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10631; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10632; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10633; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10634; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 10635; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10636; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10637; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10638; 10639; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_load: 10640; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10641; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10642; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10643; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10644; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10645; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] glc 10646; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10647; GFX90A-TGSPLIT-NEXT: buffer_invl2 10648; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10649; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10650; GFX90A-TGSPLIT-NEXT: s_endpgm 10651; 10652; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load: 10653; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10654; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10655; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10656; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10657; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10658; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10659; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10660; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 10661; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10662; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10663; 10664; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_load: 10665; GFX940-TGSPLIT: ; %bb.0: ; %entry 10666; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10667; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10668; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10669; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10670; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 10671; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10672; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 10673; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10674; GFX940-TGSPLIT-NEXT: s_endpgm 10675; 10676; GFX11-WGP-LABEL: global_system_one_as_seq_cst_load: 10677; GFX11-WGP: ; %bb.0: ; %entry 10678; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10679; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10680; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10681; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10682; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10683; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] glc 10684; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10685; GFX11-WGP-NEXT: buffer_gl1_inv 10686; GFX11-WGP-NEXT: buffer_gl0_inv 10687; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10688; GFX11-WGP-NEXT: s_endpgm 10689; 10690; GFX11-CU-LABEL: global_system_one_as_seq_cst_load: 10691; GFX11-CU: ; %bb.0: ; %entry 10692; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10693; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10694; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10695; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10696; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10697; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] glc 10698; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10699; GFX11-CU-NEXT: buffer_gl1_inv 10700; GFX11-CU-NEXT: buffer_gl0_inv 10701; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10702; GFX11-CU-NEXT: s_endpgm 10703; 10704; GFX12-WGP-LABEL: global_system_one_as_seq_cst_load: 10705; GFX12-WGP: ; %bb.0: ; %entry 10706; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10707; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10708; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10709; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10710; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10711; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10712; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10713; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10714; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10715; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10716; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10717; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10718; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 10719; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10720; GFX12-WGP-NEXT: s_endpgm 10721; 10722; GFX12-CU-LABEL: global_system_one_as_seq_cst_load: 10723; GFX12-CU: ; %bb.0: ; %entry 10724; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10725; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10726; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10727; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10728; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10729; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10730; GFX12-CU-NEXT: s_wait_storecnt 0x0 10731; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10732; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS 10733; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10734; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10735; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10736; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 10737; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10738; GFX12-CU-NEXT: s_endpgm 10739 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10740entry: 10741 %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") seq_cst, align 4 10742 store i32 %val, ptr addrspace(1) %out 10743 ret void 10744} 10745 10746define amdgpu_kernel void @global_system_one_as_unordered_store( 10747; GFX6-LABEL: global_system_one_as_unordered_store: 10748; GFX6: ; %bb.0: ; %entry 10749; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10750; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10751; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10752; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10753; GFX6-NEXT: s_mov_b32 s11, s5 10754; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10755; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10756; GFX6-NEXT: s_mov_b32 s10, -1 10757; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10758; GFX6-NEXT: s_mov_b32 s5, s11 10759; GFX6-NEXT: s_mov_b32 s6, s10 10760; GFX6-NEXT: s_mov_b32 s7, s9 10761; GFX6-NEXT: v_mov_b32_e32 v0, s8 10762; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10763; GFX6-NEXT: s_endpgm 10764; 10765; GFX7-LABEL: global_system_one_as_unordered_store: 10766; GFX7: ; %bb.0: ; %entry 10767; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10768; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10769; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10770; GFX7-NEXT: v_mov_b32_e32 v0, s6 10771; GFX7-NEXT: v_mov_b32_e32 v1, s7 10772; GFX7-NEXT: v_mov_b32_e32 v2, s4 10773; GFX7-NEXT: flat_store_dword v[0:1], v2 10774; GFX7-NEXT: s_endpgm 10775; 10776; GFX10-WGP-LABEL: global_system_one_as_unordered_store: 10777; GFX10-WGP: ; %bb.0: ; %entry 10778; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10779; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10780; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10781; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10782; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10783; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10784; GFX10-WGP-NEXT: s_endpgm 10785; 10786; GFX10-CU-LABEL: global_system_one_as_unordered_store: 10787; GFX10-CU: ; %bb.0: ; %entry 10788; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10789; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10790; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10791; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10792; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10793; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10794; GFX10-CU-NEXT: s_endpgm 10795; 10796; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store: 10797; SKIP-CACHE-INV: ; %bb.0: ; %entry 10798; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10799; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10800; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10801; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10802; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10803; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10804; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10805; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10806; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10807; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10808; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10809; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10810; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10811; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10812; SKIP-CACHE-INV-NEXT: s_endpgm 10813; 10814; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store: 10815; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10816; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10817; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10818; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10819; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10820; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10821; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10822; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10823; 10824; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_store: 10825; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10826; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10827; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10828; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10829; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10830; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10831; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10832; GFX90A-TGSPLIT-NEXT: s_endpgm 10833; 10834; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store: 10835; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10836; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10837; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10838; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10839; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10840; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10841; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10842; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10843; 10844; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_store: 10845; GFX940-TGSPLIT: ; %bb.0: ; %entry 10846; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10847; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10848; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10849; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10850; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10851; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10852; GFX940-TGSPLIT-NEXT: s_endpgm 10853; 10854; GFX11-WGP-LABEL: global_system_one_as_unordered_store: 10855; GFX11-WGP: ; %bb.0: ; %entry 10856; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10857; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10858; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10859; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10860; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 10861; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10862; GFX11-WGP-NEXT: s_endpgm 10863; 10864; GFX11-CU-LABEL: global_system_one_as_unordered_store: 10865; GFX11-CU: ; %bb.0: ; %entry 10866; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10867; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10868; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10869; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10870; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 10871; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10872; GFX11-CU-NEXT: s_endpgm 10873; 10874; GFX12-WGP-LABEL: global_system_one_as_unordered_store: 10875; GFX12-WGP: ; %bb.0: ; %entry 10876; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10877; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10878; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10879; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10880; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 10881; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10882; GFX12-WGP-NEXT: s_endpgm 10883; 10884; GFX12-CU-LABEL: global_system_one_as_unordered_store: 10885; GFX12-CU: ; %bb.0: ; %entry 10886; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10887; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10888; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10889; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10890; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 10891; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10892; GFX12-CU-NEXT: s_endpgm 10893 i32 %in, ptr addrspace(1) %out) { 10894entry: 10895 store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") unordered, align 4 10896 ret void 10897} 10898 10899define amdgpu_kernel void @global_system_one_as_monotonic_store( 10900; GFX6-LABEL: global_system_one_as_monotonic_store: 10901; GFX6: ; %bb.0: ; %entry 10902; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10903; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10904; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10905; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10906; GFX6-NEXT: s_mov_b32 s11, s5 10907; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10908; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10909; GFX6-NEXT: s_mov_b32 s10, -1 10910; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10911; GFX6-NEXT: s_mov_b32 s5, s11 10912; GFX6-NEXT: s_mov_b32 s6, s10 10913; GFX6-NEXT: s_mov_b32 s7, s9 10914; GFX6-NEXT: v_mov_b32_e32 v0, s8 10915; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10916; GFX6-NEXT: s_endpgm 10917; 10918; GFX7-LABEL: global_system_one_as_monotonic_store: 10919; GFX7: ; %bb.0: ; %entry 10920; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10921; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10922; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10923; GFX7-NEXT: v_mov_b32_e32 v0, s6 10924; GFX7-NEXT: v_mov_b32_e32 v1, s7 10925; GFX7-NEXT: v_mov_b32_e32 v2, s4 10926; GFX7-NEXT: flat_store_dword v[0:1], v2 10927; GFX7-NEXT: s_endpgm 10928; 10929; GFX10-WGP-LABEL: global_system_one_as_monotonic_store: 10930; GFX10-WGP: ; %bb.0: ; %entry 10931; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10932; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10933; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10934; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10935; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10936; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10937; GFX10-WGP-NEXT: s_endpgm 10938; 10939; GFX10-CU-LABEL: global_system_one_as_monotonic_store: 10940; GFX10-CU: ; %bb.0: ; %entry 10941; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10942; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10943; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10944; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10945; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10946; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10947; GFX10-CU-NEXT: s_endpgm 10948; 10949; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store: 10950; SKIP-CACHE-INV: ; %bb.0: ; %entry 10951; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10952; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10953; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10954; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10955; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10956; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10957; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10958; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10959; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10960; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10961; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10962; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10963; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10964; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10965; SKIP-CACHE-INV-NEXT: s_endpgm 10966; 10967; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store: 10968; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10969; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10970; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10971; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10972; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10973; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10974; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10975; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10976; 10977; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_store: 10978; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10979; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10980; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10981; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10982; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10983; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10984; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10985; GFX90A-TGSPLIT-NEXT: s_endpgm 10986; 10987; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store: 10988; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10989; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10990; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10991; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10992; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10993; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10994; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10995; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10996; 10997; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_store: 10998; GFX940-TGSPLIT: ; %bb.0: ; %entry 10999; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 11000; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11001; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11002; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11003; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11004; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11005; GFX940-TGSPLIT-NEXT: s_endpgm 11006; 11007; GFX11-WGP-LABEL: global_system_one_as_monotonic_store: 11008; GFX11-WGP: ; %bb.0: ; %entry 11009; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11010; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11011; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11012; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11013; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11014; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11015; GFX11-WGP-NEXT: s_endpgm 11016; 11017; GFX11-CU-LABEL: global_system_one_as_monotonic_store: 11018; GFX11-CU: ; %bb.0: ; %entry 11019; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11020; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11021; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11022; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11023; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11024; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11025; GFX11-CU-NEXT: s_endpgm 11026; 11027; GFX12-WGP-LABEL: global_system_one_as_monotonic_store: 11028; GFX12-WGP: ; %bb.0: ; %entry 11029; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11030; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11031; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11032; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11033; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11034; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11035; GFX12-WGP-NEXT: s_endpgm 11036; 11037; GFX12-CU-LABEL: global_system_one_as_monotonic_store: 11038; GFX12-CU: ; %bb.0: ; %entry 11039; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11040; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11041; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11042; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11043; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11044; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11045; GFX12-CU-NEXT: s_endpgm 11046 i32 %in, ptr addrspace(1) %out) { 11047entry: 11048 store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") monotonic, align 4 11049 ret void 11050} 11051 11052define amdgpu_kernel void @global_system_one_as_release_store( 11053; GFX6-LABEL: global_system_one_as_release_store: 11054; GFX6: ; %bb.0: ; %entry 11055; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11056; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 11057; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11058; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11059; GFX6-NEXT: s_mov_b32 s11, s5 11060; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11061; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11062; GFX6-NEXT: s_mov_b32 s10, -1 11063; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11064; GFX6-NEXT: s_mov_b32 s5, s11 11065; GFX6-NEXT: s_mov_b32 s6, s10 11066; GFX6-NEXT: s_mov_b32 s7, s9 11067; GFX6-NEXT: v_mov_b32_e32 v0, s8 11068; GFX6-NEXT: s_waitcnt vmcnt(0) 11069; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11070; GFX6-NEXT: s_endpgm 11071; 11072; GFX7-LABEL: global_system_one_as_release_store: 11073; GFX7: ; %bb.0: ; %entry 11074; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 11075; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 11076; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11077; GFX7-NEXT: v_mov_b32_e32 v0, s6 11078; GFX7-NEXT: v_mov_b32_e32 v1, s7 11079; GFX7-NEXT: v_mov_b32_e32 v2, s4 11080; GFX7-NEXT: s_waitcnt vmcnt(0) 11081; GFX7-NEXT: flat_store_dword v[0:1], v2 11082; GFX7-NEXT: s_endpgm 11083; 11084; GFX10-WGP-LABEL: global_system_one_as_release_store: 11085; GFX10-WGP: ; %bb.0: ; %entry 11086; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 11087; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11088; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11089; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11090; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11091; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11092; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11093; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11094; GFX10-WGP-NEXT: s_endpgm 11095; 11096; GFX10-CU-LABEL: global_system_one_as_release_store: 11097; GFX10-CU: ; %bb.0: ; %entry 11098; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 11099; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11100; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11101; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11102; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11103; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11104; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11105; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11106; GFX10-CU-NEXT: s_endpgm 11107; 11108; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store: 11109; SKIP-CACHE-INV: ; %bb.0: ; %entry 11110; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11111; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 11112; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11113; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11114; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11115; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11116; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11117; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11118; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11119; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11120; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11121; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11122; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11123; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11124; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11125; SKIP-CACHE-INV-NEXT: s_endpgm 11126; 11127; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_store: 11128; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11129; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 11130; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11131; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11132; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11133; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11134; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 11135; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11136; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11137; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11138; 11139; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_store: 11140; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11141; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 11142; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11143; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11144; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11145; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11146; GFX90A-TGSPLIT-NEXT: buffer_wbl2 11147; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11148; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11149; GFX90A-TGSPLIT-NEXT: s_endpgm 11150; 11151; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_store: 11152; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11153; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 11154; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11155; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11156; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11157; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11158; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11159; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11160; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11161; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11162; 11163; GFX940-TGSPLIT-LABEL: global_system_one_as_release_store: 11164; GFX940-TGSPLIT: ; %bb.0: ; %entry 11165; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 11166; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11167; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11168; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11169; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11170; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11171; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11172; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11173; GFX940-TGSPLIT-NEXT: s_endpgm 11174; 11175; GFX11-WGP-LABEL: global_system_one_as_release_store: 11176; GFX11-WGP: ; %bb.0: ; %entry 11177; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11178; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11179; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11180; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11181; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11182; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11183; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11184; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11185; GFX11-WGP-NEXT: s_endpgm 11186; 11187; GFX11-CU-LABEL: global_system_one_as_release_store: 11188; GFX11-CU: ; %bb.0: ; %entry 11189; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11190; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11191; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11192; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11193; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11194; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11195; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11196; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11197; GFX11-CU-NEXT: s_endpgm 11198; 11199; GFX12-WGP-LABEL: global_system_one_as_release_store: 11200; GFX12-WGP: ; %bb.0: ; %entry 11201; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11202; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11203; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11204; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11205; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11206; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 11207; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11208; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11209; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11210; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11211; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11212; GFX12-WGP-NEXT: s_endpgm 11213; 11214; GFX12-CU-LABEL: global_system_one_as_release_store: 11215; GFX12-CU: ; %bb.0: ; %entry 11216; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11217; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11218; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11219; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11220; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11221; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 11222; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11223; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11224; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11225; GFX12-CU-NEXT: s_wait_storecnt 0x0 11226; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11227; GFX12-CU-NEXT: s_endpgm 11228 i32 %in, ptr addrspace(1) %out) { 11229entry: 11230 store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") release, align 4 11231 ret void 11232} 11233 11234define amdgpu_kernel void @global_system_one_as_seq_cst_store( 11235; GFX6-LABEL: global_system_one_as_seq_cst_store: 11236; GFX6: ; %bb.0: ; %entry 11237; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 11238; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 11239; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 11240; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11241; GFX6-NEXT: s_mov_b32 s11, s5 11242; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11243; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11244; GFX6-NEXT: s_mov_b32 s10, -1 11245; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11246; GFX6-NEXT: s_mov_b32 s5, s11 11247; GFX6-NEXT: s_mov_b32 s6, s10 11248; GFX6-NEXT: s_mov_b32 s7, s9 11249; GFX6-NEXT: v_mov_b32_e32 v0, s8 11250; GFX6-NEXT: s_waitcnt vmcnt(0) 11251; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11252; GFX6-NEXT: s_endpgm 11253; 11254; GFX7-LABEL: global_system_one_as_seq_cst_store: 11255; GFX7: ; %bb.0: ; %entry 11256; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 11257; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 11258; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11259; GFX7-NEXT: v_mov_b32_e32 v0, s6 11260; GFX7-NEXT: v_mov_b32_e32 v1, s7 11261; GFX7-NEXT: v_mov_b32_e32 v2, s4 11262; GFX7-NEXT: s_waitcnt vmcnt(0) 11263; GFX7-NEXT: flat_store_dword v[0:1], v2 11264; GFX7-NEXT: s_endpgm 11265; 11266; GFX10-WGP-LABEL: global_system_one_as_seq_cst_store: 11267; GFX10-WGP: ; %bb.0: ; %entry 11268; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 11269; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11270; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11271; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11272; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11273; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11274; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11275; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11276; GFX10-WGP-NEXT: s_endpgm 11277; 11278; GFX10-CU-LABEL: global_system_one_as_seq_cst_store: 11279; GFX10-CU: ; %bb.0: ; %entry 11280; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 11281; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11282; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11283; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11284; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11285; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11286; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11287; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11288; GFX10-CU-NEXT: s_endpgm 11289; 11290; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store: 11291; SKIP-CACHE-INV: ; %bb.0: ; %entry 11292; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 11293; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 11294; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 11295; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11296; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11297; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11298; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11299; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11300; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11301; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11302; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11303; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11304; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11305; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11306; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11307; SKIP-CACHE-INV-NEXT: s_endpgm 11308; 11309; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store: 11310; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11311; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 11312; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11313; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11314; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11315; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11316; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 11317; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11318; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11319; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11320; 11321; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_store: 11322; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11323; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 11324; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11325; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11326; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11327; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11328; GFX90A-TGSPLIT-NEXT: buffer_wbl2 11329; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11330; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11331; GFX90A-TGSPLIT-NEXT: s_endpgm 11332; 11333; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store: 11334; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11335; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 11336; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11337; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11338; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11339; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11340; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11341; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11342; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11343; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11344; 11345; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_store: 11346; GFX940-TGSPLIT: ; %bb.0: ; %entry 11347; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 11348; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11349; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11350; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11351; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11352; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11353; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11354; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11355; GFX940-TGSPLIT-NEXT: s_endpgm 11356; 11357; GFX11-WGP-LABEL: global_system_one_as_seq_cst_store: 11358; GFX11-WGP: ; %bb.0: ; %entry 11359; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11360; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11361; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11362; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11363; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11364; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11365; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11366; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11367; GFX11-WGP-NEXT: s_endpgm 11368; 11369; GFX11-CU-LABEL: global_system_one_as_seq_cst_store: 11370; GFX11-CU: ; %bb.0: ; %entry 11371; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11372; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11373; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11374; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11375; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11376; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11377; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11378; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11379; GFX11-CU-NEXT: s_endpgm 11380; 11381; GFX12-WGP-LABEL: global_system_one_as_seq_cst_store: 11382; GFX12-WGP: ; %bb.0: ; %entry 11383; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 11384; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11385; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11386; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11387; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11388; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 11389; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11390; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11391; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11392; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11393; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11394; GFX12-WGP-NEXT: s_endpgm 11395; 11396; GFX12-CU-LABEL: global_system_one_as_seq_cst_store: 11397; GFX12-CU: ; %bb.0: ; %entry 11398; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 11399; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11400; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11401; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11402; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11403; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 11404; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11405; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11406; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11407; GFX12-CU-NEXT: s_wait_storecnt 0x0 11408; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11409; GFX12-CU-NEXT: s_endpgm 11410 i32 %in, ptr addrspace(1) %out) { 11411entry: 11412 store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") seq_cst, align 4 11413 ret void 11414} 11415 11416define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( 11417; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw: 11418; GFX6: ; %bb.0: ; %entry 11419; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11420; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11421; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11422; GFX6-NEXT: s_mov_b32 s11, s5 11423; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11424; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11425; GFX6-NEXT: s_mov_b32 s10, -1 11426; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11427; GFX6-NEXT: s_mov_b32 s5, s11 11428; GFX6-NEXT: s_mov_b32 s6, s10 11429; GFX6-NEXT: s_mov_b32 s7, s9 11430; GFX6-NEXT: v_mov_b32_e32 v0, s8 11431; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11432; GFX6-NEXT: s_endpgm 11433; 11434; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw: 11435; GFX7: ; %bb.0: ; %entry 11436; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11437; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11438; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11439; GFX7-NEXT: v_mov_b32_e32 v0, s6 11440; GFX7-NEXT: v_mov_b32_e32 v1, s7 11441; GFX7-NEXT: v_mov_b32_e32 v2, s4 11442; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11443; GFX7-NEXT: s_endpgm 11444; 11445; GFX10-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: 11446; GFX10-WGP: ; %bb.0: ; %entry 11447; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11448; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11449; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11450; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11451; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11452; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11453; GFX10-WGP-NEXT: s_endpgm 11454; 11455; GFX10-CU-LABEL: global_system_one_as_monotonic_atomicrmw: 11456; GFX10-CU: ; %bb.0: ; %entry 11457; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11458; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11459; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11460; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11461; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11462; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11463; GFX10-CU-NEXT: s_endpgm 11464; 11465; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw: 11466; SKIP-CACHE-INV: ; %bb.0: ; %entry 11467; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11468; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11469; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11470; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11471; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11472; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11473; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11474; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11475; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11476; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11477; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11479; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11480; SKIP-CACHE-INV-NEXT: s_endpgm 11481; 11482; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 11483; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11484; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11485; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11486; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11487; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11488; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11489; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11490; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11491; 11492; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 11493; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11494; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11495; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11496; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11497; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11498; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11499; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11500; GFX90A-TGSPLIT-NEXT: s_endpgm 11501; 11502; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 11503; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11504; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11505; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11506; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11507; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11508; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11509; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11510; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11511; 11512; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: 11513; GFX940-TGSPLIT: ; %bb.0: ; %entry 11514; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11515; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11516; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11517; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11518; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11519; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11520; GFX940-TGSPLIT-NEXT: s_endpgm 11521; 11522; GFX11-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: 11523; GFX11-WGP: ; %bb.0: ; %entry 11524; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11525; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11526; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11527; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11528; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11529; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11530; GFX11-WGP-NEXT: s_endpgm 11531; 11532; GFX11-CU-LABEL: global_system_one_as_monotonic_atomicrmw: 11533; GFX11-CU: ; %bb.0: ; %entry 11534; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11535; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11536; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11537; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11538; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11539; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11540; GFX11-CU-NEXT: s_endpgm 11541; 11542; GFX12-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: 11543; GFX12-WGP: ; %bb.0: ; %entry 11544; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11545; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11546; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11547; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11548; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11549; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11550; GFX12-WGP-NEXT: s_endpgm 11551; 11552; GFX12-CU-LABEL: global_system_one_as_monotonic_atomicrmw: 11553; GFX12-CU: ; %bb.0: ; %entry 11554; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11555; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11556; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11557; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11558; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11559; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11560; GFX12-CU-NEXT: s_endpgm 11561 ptr addrspace(1) %out, i32 %in) { 11562entry: 11563 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") monotonic 11564 ret void 11565} 11566 11567define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( 11568; GFX6-LABEL: global_system_one_as_acquire_atomicrmw: 11569; GFX6: ; %bb.0: ; %entry 11570; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11571; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11572; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11573; GFX6-NEXT: s_mov_b32 s11, s5 11574; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11575; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11576; GFX6-NEXT: s_mov_b32 s10, -1 11577; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11578; GFX6-NEXT: s_mov_b32 s5, s11 11579; GFX6-NEXT: s_mov_b32 s6, s10 11580; GFX6-NEXT: s_mov_b32 s7, s9 11581; GFX6-NEXT: v_mov_b32_e32 v0, s8 11582; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11583; GFX6-NEXT: s_waitcnt vmcnt(0) 11584; GFX6-NEXT: buffer_wbinvl1 11585; GFX6-NEXT: s_endpgm 11586; 11587; GFX7-LABEL: global_system_one_as_acquire_atomicrmw: 11588; GFX7: ; %bb.0: ; %entry 11589; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11590; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11591; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11592; GFX7-NEXT: v_mov_b32_e32 v0, s6 11593; GFX7-NEXT: v_mov_b32_e32 v1, s7 11594; GFX7-NEXT: v_mov_b32_e32 v2, s4 11595; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11596; GFX7-NEXT: s_waitcnt vmcnt(0) 11597; GFX7-NEXT: buffer_wbinvl1_vol 11598; GFX7-NEXT: s_endpgm 11599; 11600; GFX10-WGP-LABEL: global_system_one_as_acquire_atomicrmw: 11601; GFX10-WGP: ; %bb.0: ; %entry 11602; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11603; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11604; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11605; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11606; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11607; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11608; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11609; GFX10-WGP-NEXT: buffer_gl1_inv 11610; GFX10-WGP-NEXT: buffer_gl0_inv 11611; GFX10-WGP-NEXT: s_endpgm 11612; 11613; GFX10-CU-LABEL: global_system_one_as_acquire_atomicrmw: 11614; GFX10-CU: ; %bb.0: ; %entry 11615; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11616; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11617; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11618; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11619; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11620; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11621; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11622; GFX10-CU-NEXT: buffer_gl1_inv 11623; GFX10-CU-NEXT: buffer_gl0_inv 11624; GFX10-CU-NEXT: s_endpgm 11625; 11626; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: 11627; SKIP-CACHE-INV: ; %bb.0: ; %entry 11628; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11629; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11630; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11631; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11632; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11633; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11634; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11635; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11636; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11637; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11638; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11639; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11640; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11641; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11642; SKIP-CACHE-INV-NEXT: s_endpgm 11643; 11644; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 11645; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11646; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11647; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11648; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11649; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11650; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11651; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11652; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11653; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 11654; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11655; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11656; 11657; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 11658; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11659; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11660; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11661; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11662; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11663; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11664; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11665; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11666; GFX90A-TGSPLIT-NEXT: buffer_invl2 11667; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11668; GFX90A-TGSPLIT-NEXT: s_endpgm 11669; 11670; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 11671; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11672; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11673; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11674; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11675; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11676; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11677; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11678; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11679; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 11680; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11681; 11682; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: 11683; GFX940-TGSPLIT: ; %bb.0: ; %entry 11684; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11685; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11686; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11687; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11688; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11689; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11690; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11691; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 11692; GFX940-TGSPLIT-NEXT: s_endpgm 11693; 11694; GFX11-WGP-LABEL: global_system_one_as_acquire_atomicrmw: 11695; GFX11-WGP: ; %bb.0: ; %entry 11696; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11697; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11698; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11699; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11700; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11701; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11702; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11703; GFX11-WGP-NEXT: buffer_gl1_inv 11704; GFX11-WGP-NEXT: buffer_gl0_inv 11705; GFX11-WGP-NEXT: s_endpgm 11706; 11707; GFX11-CU-LABEL: global_system_one_as_acquire_atomicrmw: 11708; GFX11-CU: ; %bb.0: ; %entry 11709; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11710; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11711; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11712; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11713; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11714; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11715; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11716; GFX11-CU-NEXT: buffer_gl1_inv 11717; GFX11-CU-NEXT: buffer_gl0_inv 11718; GFX11-CU-NEXT: s_endpgm 11719; 11720; GFX12-WGP-LABEL: global_system_one_as_acquire_atomicrmw: 11721; GFX12-WGP: ; %bb.0: ; %entry 11722; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11723; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11724; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11725; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11726; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11727; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11728; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11729; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 11730; GFX12-WGP-NEXT: s_endpgm 11731; 11732; GFX12-CU-LABEL: global_system_one_as_acquire_atomicrmw: 11733; GFX12-CU: ; %bb.0: ; %entry 11734; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11735; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11736; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11737; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11738; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11739; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11740; GFX12-CU-NEXT: s_wait_storecnt 0x0 11741; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 11742; GFX12-CU-NEXT: s_endpgm 11743 ptr addrspace(1) %out, i32 %in) { 11744entry: 11745 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire 11746 ret void 11747} 11748 11749define amdgpu_kernel void @global_system_one_as_release_atomicrmw( 11750; GFX6-LABEL: global_system_one_as_release_atomicrmw: 11751; GFX6: ; %bb.0: ; %entry 11752; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11753; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11754; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11755; GFX6-NEXT: s_mov_b32 s11, s5 11756; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11757; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11758; GFX6-NEXT: s_mov_b32 s10, -1 11759; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11760; GFX6-NEXT: s_mov_b32 s5, s11 11761; GFX6-NEXT: s_mov_b32 s6, s10 11762; GFX6-NEXT: s_mov_b32 s7, s9 11763; GFX6-NEXT: v_mov_b32_e32 v0, s8 11764; GFX6-NEXT: s_waitcnt vmcnt(0) 11765; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11766; GFX6-NEXT: s_endpgm 11767; 11768; GFX7-LABEL: global_system_one_as_release_atomicrmw: 11769; GFX7: ; %bb.0: ; %entry 11770; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11771; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11772; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11773; GFX7-NEXT: v_mov_b32_e32 v0, s6 11774; GFX7-NEXT: v_mov_b32_e32 v1, s7 11775; GFX7-NEXT: v_mov_b32_e32 v2, s4 11776; GFX7-NEXT: s_waitcnt vmcnt(0) 11777; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11778; GFX7-NEXT: s_endpgm 11779; 11780; GFX10-WGP-LABEL: global_system_one_as_release_atomicrmw: 11781; GFX10-WGP: ; %bb.0: ; %entry 11782; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11783; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11784; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11785; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11786; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11787; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11788; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11789; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11790; GFX10-WGP-NEXT: s_endpgm 11791; 11792; GFX10-CU-LABEL: global_system_one_as_release_atomicrmw: 11793; GFX10-CU: ; %bb.0: ; %entry 11794; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11795; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11796; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11797; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11798; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11799; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11800; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11801; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11802; GFX10-CU-NEXT: s_endpgm 11803; 11804; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw: 11805; SKIP-CACHE-INV: ; %bb.0: ; %entry 11806; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11807; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11808; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11809; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11810; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11811; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11812; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11813; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11814; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11815; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11816; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11817; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11818; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11819; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11820; SKIP-CACHE-INV-NEXT: s_endpgm 11821; 11822; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 11823; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11825; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11826; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11827; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11828; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11829; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 11830; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11831; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11832; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11833; 11834; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 11835; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11836; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11837; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11838; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11839; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11841; GFX90A-TGSPLIT-NEXT: buffer_wbl2 11842; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11843; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11844; GFX90A-TGSPLIT-NEXT: s_endpgm 11845; 11846; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 11847; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11848; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11849; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11850; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11851; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11852; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11853; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11854; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11855; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11856; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11857; 11858; GFX940-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw: 11859; GFX940-TGSPLIT: ; %bb.0: ; %entry 11860; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11861; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11862; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11863; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11864; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11865; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11866; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11867; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 11868; GFX940-TGSPLIT-NEXT: s_endpgm 11869; 11870; GFX11-WGP-LABEL: global_system_one_as_release_atomicrmw: 11871; GFX11-WGP: ; %bb.0: ; %entry 11872; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11873; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11874; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11875; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11876; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11877; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11878; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11879; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11880; GFX11-WGP-NEXT: s_endpgm 11881; 11882; GFX11-CU-LABEL: global_system_one_as_release_atomicrmw: 11883; GFX11-CU: ; %bb.0: ; %entry 11884; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11885; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11886; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11887; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11888; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11889; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11890; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11891; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11892; GFX11-CU-NEXT: s_endpgm 11893; 11894; GFX12-WGP-LABEL: global_system_one_as_release_atomicrmw: 11895; GFX12-WGP: ; %bb.0: ; %entry 11896; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11897; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11898; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11899; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11900; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11901; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 11902; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11903; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11904; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11905; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11906; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11907; GFX12-WGP-NEXT: s_endpgm 11908; 11909; GFX12-CU-LABEL: global_system_one_as_release_atomicrmw: 11910; GFX12-CU: ; %bb.0: ; %entry 11911; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11912; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11913; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11914; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11915; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11916; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 11917; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11918; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11919; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11920; GFX12-CU-NEXT: s_wait_storecnt 0x0 11921; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 11922; GFX12-CU-NEXT: s_endpgm 11923 ptr addrspace(1) %out, i32 %in) { 11924entry: 11925 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") release 11926 ret void 11927} 11928 11929define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( 11930; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw: 11931; GFX6: ; %bb.0: ; %entry 11932; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11933; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11934; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11935; GFX6-NEXT: s_mov_b32 s11, s5 11936; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11937; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11938; GFX6-NEXT: s_mov_b32 s10, -1 11939; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11940; GFX6-NEXT: s_mov_b32 s5, s11 11941; GFX6-NEXT: s_mov_b32 s6, s10 11942; GFX6-NEXT: s_mov_b32 s7, s9 11943; GFX6-NEXT: v_mov_b32_e32 v0, s8 11944; GFX6-NEXT: s_waitcnt vmcnt(0) 11945; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11946; GFX6-NEXT: s_waitcnt vmcnt(0) 11947; GFX6-NEXT: buffer_wbinvl1 11948; GFX6-NEXT: s_endpgm 11949; 11950; GFX7-LABEL: global_system_one_as_acq_rel_atomicrmw: 11951; GFX7: ; %bb.0: ; %entry 11952; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11953; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11954; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11955; GFX7-NEXT: v_mov_b32_e32 v0, s6 11956; GFX7-NEXT: v_mov_b32_e32 v1, s7 11957; GFX7-NEXT: v_mov_b32_e32 v2, s4 11958; GFX7-NEXT: s_waitcnt vmcnt(0) 11959; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11960; GFX7-NEXT: s_waitcnt vmcnt(0) 11961; GFX7-NEXT: buffer_wbinvl1_vol 11962; GFX7-NEXT: s_endpgm 11963; 11964; GFX10-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: 11965; GFX10-WGP: ; %bb.0: ; %entry 11966; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11967; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11968; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11969; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11970; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11971; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11972; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11973; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11974; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11975; GFX10-WGP-NEXT: buffer_gl1_inv 11976; GFX10-WGP-NEXT: buffer_gl0_inv 11977; GFX10-WGP-NEXT: s_endpgm 11978; 11979; GFX10-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: 11980; GFX10-CU: ; %bb.0: ; %entry 11981; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11982; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11983; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11984; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11985; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11986; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11987; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11988; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11989; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11990; GFX10-CU-NEXT: buffer_gl1_inv 11991; GFX10-CU-NEXT: buffer_gl0_inv 11992; GFX10-CU-NEXT: s_endpgm 11993; 11994; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: 11995; SKIP-CACHE-INV: ; %bb.0: ; %entry 11996; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11997; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11998; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11999; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12000; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12001; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12002; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12003; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12004; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12005; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12006; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12007; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12008; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12009; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 12010; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12011; SKIP-CACHE-INV-NEXT: s_endpgm 12012; 12013; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 12014; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12015; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12016; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12017; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12018; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12019; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12020; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12021; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12022; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12023; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12024; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12025; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12026; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12027; 12028; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 12029; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12030; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12031; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12032; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12033; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12034; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12035; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12036; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12037; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12038; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12039; GFX90A-TGSPLIT-NEXT: buffer_invl2 12040; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12041; GFX90A-TGSPLIT-NEXT: s_endpgm 12042; 12043; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 12044; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12045; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12046; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12047; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12048; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12049; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12050; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12051; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12052; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 12053; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12054; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12055; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12056; 12057; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: 12058; GFX940-TGSPLIT: ; %bb.0: ; %entry 12059; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12060; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12061; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12062; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12063; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12064; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12065; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12066; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 12067; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12068; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12069; GFX940-TGSPLIT-NEXT: s_endpgm 12070; 12071; GFX11-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: 12072; GFX11-WGP: ; %bb.0: ; %entry 12073; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12074; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12075; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12076; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12077; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12078; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12079; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12080; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12081; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12082; GFX11-WGP-NEXT: buffer_gl1_inv 12083; GFX11-WGP-NEXT: buffer_gl0_inv 12084; GFX11-WGP-NEXT: s_endpgm 12085; 12086; GFX11-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: 12087; GFX11-CU: ; %bb.0: ; %entry 12088; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12089; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12090; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12091; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12092; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12093; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12094; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12095; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12096; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12097; GFX11-CU-NEXT: buffer_gl1_inv 12098; GFX11-CU-NEXT: buffer_gl0_inv 12099; GFX11-CU-NEXT: s_endpgm 12100; 12101; GFX12-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: 12102; GFX12-WGP: ; %bb.0: ; %entry 12103; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12104; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12105; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12106; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12107; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12108; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12109; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12110; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12111; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12112; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12113; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 12114; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12115; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12116; GFX12-WGP-NEXT: s_endpgm 12117; 12118; GFX12-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: 12119; GFX12-CU: ; %bb.0: ; %entry 12120; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12121; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12122; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12123; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12124; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12125; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12126; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12127; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12128; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12129; GFX12-CU-NEXT: s_wait_storecnt 0x0 12130; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 12131; GFX12-CU-NEXT: s_wait_storecnt 0x0 12132; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12133; GFX12-CU-NEXT: s_endpgm 12134 ptr addrspace(1) %out, i32 %in) { 12135entry: 12136 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel 12137 ret void 12138} 12139 12140define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( 12141; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw: 12142; GFX6: ; %bb.0: ; %entry 12143; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12144; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12145; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12146; GFX6-NEXT: s_mov_b32 s11, s5 12147; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12148; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12149; GFX6-NEXT: s_mov_b32 s10, -1 12150; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12151; GFX6-NEXT: s_mov_b32 s5, s11 12152; GFX6-NEXT: s_mov_b32 s6, s10 12153; GFX6-NEXT: s_mov_b32 s7, s9 12154; GFX6-NEXT: v_mov_b32_e32 v0, s8 12155; GFX6-NEXT: s_waitcnt vmcnt(0) 12156; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 12157; GFX6-NEXT: s_waitcnt vmcnt(0) 12158; GFX6-NEXT: buffer_wbinvl1 12159; GFX6-NEXT: s_endpgm 12160; 12161; GFX7-LABEL: global_system_one_as_seq_cst_atomicrmw: 12162; GFX7: ; %bb.0: ; %entry 12163; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12164; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 12165; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12166; GFX7-NEXT: v_mov_b32_e32 v0, s6 12167; GFX7-NEXT: v_mov_b32_e32 v1, s7 12168; GFX7-NEXT: v_mov_b32_e32 v2, s4 12169; GFX7-NEXT: s_waitcnt vmcnt(0) 12170; GFX7-NEXT: flat_atomic_swap v[0:1], v2 12171; GFX7-NEXT: s_waitcnt vmcnt(0) 12172; GFX7-NEXT: buffer_wbinvl1_vol 12173; GFX7-NEXT: s_endpgm 12174; 12175; GFX10-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: 12176; GFX10-WGP: ; %bb.0: ; %entry 12177; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12178; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12179; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12180; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12181; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12182; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12183; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12184; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 12185; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12186; GFX10-WGP-NEXT: buffer_gl1_inv 12187; GFX10-WGP-NEXT: buffer_gl0_inv 12188; GFX10-WGP-NEXT: s_endpgm 12189; 12190; GFX10-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: 12191; GFX10-CU: ; %bb.0: ; %entry 12192; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12193; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12194; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12195; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12196; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12197; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12198; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12199; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 12200; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12201; GFX10-CU-NEXT: buffer_gl1_inv 12202; GFX10-CU-NEXT: buffer_gl0_inv 12203; GFX10-CU-NEXT: s_endpgm 12204; 12205; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: 12206; SKIP-CACHE-INV: ; %bb.0: ; %entry 12207; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12208; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12209; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12210; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12211; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12212; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12213; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12214; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12215; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12216; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12217; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12218; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12219; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12220; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 12221; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12222; SKIP-CACHE-INV-NEXT: s_endpgm 12223; 12224; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 12225; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12226; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12227; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12228; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12229; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12230; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12231; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12232; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12233; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12234; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12235; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12236; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12237; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12238; 12239; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 12240; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12241; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12242; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12243; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12244; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12245; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12246; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12247; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12248; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 12249; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12250; GFX90A-TGSPLIT-NEXT: buffer_invl2 12251; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12252; GFX90A-TGSPLIT-NEXT: s_endpgm 12253; 12254; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 12255; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12256; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12257; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12258; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12259; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12260; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12261; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12262; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12263; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 12264; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12265; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12266; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12267; 12268; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: 12269; GFX940-TGSPLIT: ; %bb.0: ; %entry 12270; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12271; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12272; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12273; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12274; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12275; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12276; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12277; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 12278; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12279; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12280; GFX940-TGSPLIT-NEXT: s_endpgm 12281; 12282; GFX11-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: 12283; GFX11-WGP: ; %bb.0: ; %entry 12284; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12285; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12286; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12287; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12288; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12289; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12290; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12291; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12292; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12293; GFX11-WGP-NEXT: buffer_gl1_inv 12294; GFX11-WGP-NEXT: buffer_gl0_inv 12295; GFX11-WGP-NEXT: s_endpgm 12296; 12297; GFX11-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: 12298; GFX11-CU: ; %bb.0: ; %entry 12299; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12300; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12301; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12302; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12303; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12304; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12305; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12306; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 12307; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12308; GFX11-CU-NEXT: buffer_gl1_inv 12309; GFX11-CU-NEXT: buffer_gl0_inv 12310; GFX11-CU-NEXT: s_endpgm 12311; 12312; GFX12-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: 12313; GFX12-WGP: ; %bb.0: ; %entry 12314; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12315; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12316; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12317; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12318; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12319; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12320; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12321; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12322; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12323; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12324; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 12325; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12326; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12327; GFX12-WGP-NEXT: s_endpgm 12328; 12329; GFX12-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: 12330; GFX12-CU: ; %bb.0: ; %entry 12331; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12332; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12333; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12334; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12335; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12336; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12337; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12338; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12339; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12340; GFX12-CU-NEXT: s_wait_storecnt 0x0 12341; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS 12342; GFX12-CU-NEXT: s_wait_storecnt 0x0 12343; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12344; GFX12-CU-NEXT: s_endpgm 12345 ptr addrspace(1) %out, i32 %in) { 12346entry: 12347 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst 12348 ret void 12349} 12350 12351define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( 12352; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12353; GFX6: ; %bb.0: ; %entry 12354; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12355; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12356; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12357; GFX6-NEXT: s_mov_b32 s11, s5 12358; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12359; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12360; GFX6-NEXT: s_mov_b32 s10, -1 12361; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12362; GFX6-NEXT: s_mov_b32 s5, s11 12363; GFX6-NEXT: s_mov_b32 s6, s10 12364; GFX6-NEXT: s_mov_b32 s7, s9 12365; GFX6-NEXT: v_mov_b32_e32 v0, s8 12366; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 12367; GFX6-NEXT: s_waitcnt vmcnt(0) 12368; GFX6-NEXT: buffer_wbinvl1 12369; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12370; GFX6-NEXT: s_endpgm 12371; 12372; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12373; GFX7: ; %bb.0: ; %entry 12374; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12375; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 12376; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12377; GFX7-NEXT: v_mov_b32_e32 v0, s4 12378; GFX7-NEXT: v_mov_b32_e32 v1, s5 12379; GFX7-NEXT: v_mov_b32_e32 v2, s6 12380; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 12381; GFX7-NEXT: s_waitcnt vmcnt(0) 12382; GFX7-NEXT: buffer_wbinvl1_vol 12383; GFX7-NEXT: v_mov_b32_e32 v0, s4 12384; GFX7-NEXT: v_mov_b32_e32 v1, s5 12385; GFX7-NEXT: flat_store_dword v[0:1], v2 12386; GFX7-NEXT: s_endpgm 12387; 12388; GFX10-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12389; GFX10-WGP: ; %bb.0: ; %entry 12390; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12391; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12392; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12393; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12394; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12395; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12396; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12397; GFX10-WGP-NEXT: buffer_gl1_inv 12398; GFX10-WGP-NEXT: buffer_gl0_inv 12399; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12400; GFX10-WGP-NEXT: s_endpgm 12401; 12402; GFX10-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12403; GFX10-CU: ; %bb.0: ; %entry 12404; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12405; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12406; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12407; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12408; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12409; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12410; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12411; GFX10-CU-NEXT: buffer_gl1_inv 12412; GFX10-CU-NEXT: buffer_gl0_inv 12413; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12414; GFX10-CU-NEXT: s_endpgm 12415; 12416; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12417; SKIP-CACHE-INV: ; %bb.0: ; %entry 12418; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12419; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12420; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12421; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12422; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12423; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12424; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12425; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12426; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12427; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12428; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12429; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12430; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 12431; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12432; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12433; SKIP-CACHE-INV-NEXT: s_endpgm 12434; 12435; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12436; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12437; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12438; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12439; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12440; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12441; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12442; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12443; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12444; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12445; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12446; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12447; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12448; 12449; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12450; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12451; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12452; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12453; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12454; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12455; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12456; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12457; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12458; GFX90A-TGSPLIT-NEXT: buffer_invl2 12459; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12460; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12461; GFX90A-TGSPLIT-NEXT: s_endpgm 12462; 12463; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12464; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12465; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12466; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12467; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12468; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12469; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12470; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12471; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12472; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12473; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12474; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12475; 12476; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12477; GFX940-TGSPLIT: ; %bb.0: ; %entry 12478; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12479; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12480; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12481; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12482; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12483; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12484; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12485; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12486; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12487; GFX940-TGSPLIT-NEXT: s_endpgm 12488; 12489; GFX11-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12490; GFX11-WGP: ; %bb.0: ; %entry 12491; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12492; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12493; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12494; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12495; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12496; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12497; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12498; GFX11-WGP-NEXT: buffer_gl1_inv 12499; GFX11-WGP-NEXT: buffer_gl0_inv 12500; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12501; GFX11-WGP-NEXT: s_endpgm 12502; 12503; GFX11-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12504; GFX11-CU: ; %bb.0: ; %entry 12505; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12506; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12507; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12508; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12509; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12510; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12511; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12512; GFX11-CU-NEXT: buffer_gl1_inv 12513; GFX11-CU-NEXT: buffer_gl0_inv 12514; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12515; GFX11-CU-NEXT: s_endpgm 12516; 12517; GFX12-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12518; GFX12-WGP: ; %bb.0: ; %entry 12519; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12520; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12521; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12522; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12523; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12524; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12525; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12526; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12527; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12528; GFX12-WGP-NEXT: s_endpgm 12529; 12530; GFX12-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw: 12531; GFX12-CU: ; %bb.0: ; %entry 12532; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12533; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12534; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12535; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12536; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12537; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12538; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12539; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12540; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12541; GFX12-CU-NEXT: s_endpgm 12542 ptr addrspace(1) %out, i32 %in) { 12543entry: 12544 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire 12545 store i32 %val, ptr addrspace(1) %out, align 4 12546 ret void 12547} 12548 12549define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( 12550; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12551; GFX6: ; %bb.0: ; %entry 12552; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12553; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12554; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12555; GFX6-NEXT: s_mov_b32 s11, s5 12556; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12557; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12558; GFX6-NEXT: s_mov_b32 s10, -1 12559; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12560; GFX6-NEXT: s_mov_b32 s5, s11 12561; GFX6-NEXT: s_mov_b32 s6, s10 12562; GFX6-NEXT: s_mov_b32 s7, s9 12563; GFX6-NEXT: v_mov_b32_e32 v0, s8 12564; GFX6-NEXT: s_waitcnt vmcnt(0) 12565; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 12566; GFX6-NEXT: s_waitcnt vmcnt(0) 12567; GFX6-NEXT: buffer_wbinvl1 12568; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12569; GFX6-NEXT: s_endpgm 12570; 12571; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12572; GFX7: ; %bb.0: ; %entry 12573; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12574; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 12575; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12576; GFX7-NEXT: v_mov_b32_e32 v0, s4 12577; GFX7-NEXT: v_mov_b32_e32 v1, s5 12578; GFX7-NEXT: v_mov_b32_e32 v2, s6 12579; GFX7-NEXT: s_waitcnt vmcnt(0) 12580; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 12581; GFX7-NEXT: s_waitcnt vmcnt(0) 12582; GFX7-NEXT: buffer_wbinvl1_vol 12583; GFX7-NEXT: v_mov_b32_e32 v0, s4 12584; GFX7-NEXT: v_mov_b32_e32 v1, s5 12585; GFX7-NEXT: flat_store_dword v[0:1], v2 12586; GFX7-NEXT: s_endpgm 12587; 12588; GFX10-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12589; GFX10-WGP: ; %bb.0: ; %entry 12590; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12591; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12592; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12593; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12594; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12595; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12596; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12597; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12598; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12599; GFX10-WGP-NEXT: buffer_gl1_inv 12600; GFX10-WGP-NEXT: buffer_gl0_inv 12601; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12602; GFX10-WGP-NEXT: s_endpgm 12603; 12604; GFX10-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12605; GFX10-CU: ; %bb.0: ; %entry 12606; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12607; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12608; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12609; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12610; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12611; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12612; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12613; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12614; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12615; GFX10-CU-NEXT: buffer_gl1_inv 12616; GFX10-CU-NEXT: buffer_gl0_inv 12617; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12618; GFX10-CU-NEXT: s_endpgm 12619; 12620; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12621; SKIP-CACHE-INV: ; %bb.0: ; %entry 12622; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12623; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12624; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12625; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12626; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12627; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12628; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12629; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12630; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12631; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12632; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12633; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12634; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12635; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 12636; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12637; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12638; SKIP-CACHE-INV-NEXT: s_endpgm 12639; 12640; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12641; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12642; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12643; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12644; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12645; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12646; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12647; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12648; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12649; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12650; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12651; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12652; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12653; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12654; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12655; 12656; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12657; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12658; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12659; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12660; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12661; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12662; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12663; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12664; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12665; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12666; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12667; GFX90A-TGSPLIT-NEXT: buffer_invl2 12668; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12669; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12670; GFX90A-TGSPLIT-NEXT: s_endpgm 12671; 12672; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12673; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12674; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12675; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12676; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12677; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12678; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12679; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12680; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12681; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12682; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12683; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12684; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12685; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12686; 12687; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12688; GFX940-TGSPLIT: ; %bb.0: ; %entry 12689; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12690; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12691; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12692; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12693; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12694; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12695; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12696; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12697; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12698; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12699; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12700; GFX940-TGSPLIT-NEXT: s_endpgm 12701; 12702; GFX11-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12703; GFX11-WGP: ; %bb.0: ; %entry 12704; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12705; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12706; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12707; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12708; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12709; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12710; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12711; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12712; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12713; GFX11-WGP-NEXT: buffer_gl1_inv 12714; GFX11-WGP-NEXT: buffer_gl0_inv 12715; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12716; GFX11-WGP-NEXT: s_endpgm 12717; 12718; GFX11-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12719; GFX11-CU: ; %bb.0: ; %entry 12720; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12721; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12722; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12723; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12724; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12725; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12726; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12727; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12728; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12729; GFX11-CU-NEXT: buffer_gl1_inv 12730; GFX11-CU-NEXT: buffer_gl0_inv 12731; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12732; GFX11-CU-NEXT: s_endpgm 12733; 12734; GFX12-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12735; GFX12-WGP: ; %bb.0: ; %entry 12736; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12737; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12738; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12739; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12740; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12741; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12742; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12743; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12744; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12745; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12746; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12747; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12748; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12749; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12750; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12751; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12752; GFX12-WGP-NEXT: s_endpgm 12753; 12754; GFX12-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 12755; GFX12-CU: ; %bb.0: ; %entry 12756; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12757; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12758; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12759; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12760; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12761; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12762; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12763; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12764; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12765; GFX12-CU-NEXT: s_wait_storecnt 0x0 12766; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12767; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12768; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12769; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12770; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12771; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12772; GFX12-CU-NEXT: s_endpgm 12773 ptr addrspace(1) %out, i32 %in) { 12774entry: 12775 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel 12776 store i32 %val, ptr addrspace(1) %out, align 4 12777 ret void 12778} 12779 12780define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( 12781; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12782; GFX6: ; %bb.0: ; %entry 12783; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12784; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12785; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12786; GFX6-NEXT: s_mov_b32 s11, s5 12787; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12788; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12789; GFX6-NEXT: s_mov_b32 s10, -1 12790; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12791; GFX6-NEXT: s_mov_b32 s5, s11 12792; GFX6-NEXT: s_mov_b32 s6, s10 12793; GFX6-NEXT: s_mov_b32 s7, s9 12794; GFX6-NEXT: v_mov_b32_e32 v0, s8 12795; GFX6-NEXT: s_waitcnt vmcnt(0) 12796; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 12797; GFX6-NEXT: s_waitcnt vmcnt(0) 12798; GFX6-NEXT: buffer_wbinvl1 12799; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12800; GFX6-NEXT: s_endpgm 12801; 12802; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12803; GFX7: ; %bb.0: ; %entry 12804; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12805; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 12806; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12807; GFX7-NEXT: v_mov_b32_e32 v0, s4 12808; GFX7-NEXT: v_mov_b32_e32 v1, s5 12809; GFX7-NEXT: v_mov_b32_e32 v2, s6 12810; GFX7-NEXT: s_waitcnt vmcnt(0) 12811; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 12812; GFX7-NEXT: s_waitcnt vmcnt(0) 12813; GFX7-NEXT: buffer_wbinvl1_vol 12814; GFX7-NEXT: v_mov_b32_e32 v0, s4 12815; GFX7-NEXT: v_mov_b32_e32 v1, s5 12816; GFX7-NEXT: flat_store_dword v[0:1], v2 12817; GFX7-NEXT: s_endpgm 12818; 12819; GFX10-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12820; GFX10-WGP: ; %bb.0: ; %entry 12821; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12822; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12823; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12824; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12825; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12826; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12827; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12828; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12829; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12830; GFX10-WGP-NEXT: buffer_gl1_inv 12831; GFX10-WGP-NEXT: buffer_gl0_inv 12832; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12833; GFX10-WGP-NEXT: s_endpgm 12834; 12835; GFX10-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12836; GFX10-CU: ; %bb.0: ; %entry 12837; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12838; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12839; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12840; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12841; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12842; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12843; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12844; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12845; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12846; GFX10-CU-NEXT: buffer_gl1_inv 12847; GFX10-CU-NEXT: buffer_gl0_inv 12848; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12849; GFX10-CU-NEXT: s_endpgm 12850; 12851; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12852; SKIP-CACHE-INV: ; %bb.0: ; %entry 12853; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12854; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12855; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12856; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12857; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12858; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12859; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12860; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12861; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12862; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12863; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12864; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12865; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12866; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 12867; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12868; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12869; SKIP-CACHE-INV-NEXT: s_endpgm 12870; 12871; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12872; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12873; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12874; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12875; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12876; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12877; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12878; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12879; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12880; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12881; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12882; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12883; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12884; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12885; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12886; 12887; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12888; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12889; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12890; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12891; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12892; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12893; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12894; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12895; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12896; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12897; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12898; GFX90A-TGSPLIT-NEXT: buffer_invl2 12899; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12900; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12901; GFX90A-TGSPLIT-NEXT: s_endpgm 12902; 12903; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12904; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12905; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12906; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12907; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12908; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12909; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12910; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12911; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12912; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12913; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12914; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12915; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12916; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12917; 12918; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12919; GFX940-TGSPLIT: ; %bb.0: ; %entry 12920; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12921; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12922; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12923; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12924; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12925; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12926; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12927; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 12928; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12929; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12930; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12931; GFX940-TGSPLIT-NEXT: s_endpgm 12932; 12933; GFX11-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12934; GFX11-WGP: ; %bb.0: ; %entry 12935; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12936; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12937; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12938; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12939; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12940; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12941; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12942; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12943; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12944; GFX11-WGP-NEXT: buffer_gl1_inv 12945; GFX11-WGP-NEXT: buffer_gl0_inv 12946; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12947; GFX11-WGP-NEXT: s_endpgm 12948; 12949; GFX11-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12950; GFX11-CU: ; %bb.0: ; %entry 12951; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12952; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12953; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12954; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12955; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12956; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12957; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12958; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12959; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12960; GFX11-CU-NEXT: buffer_gl1_inv 12961; GFX11-CU-NEXT: buffer_gl0_inv 12962; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12963; GFX11-CU-NEXT: s_endpgm 12964; 12965; GFX12-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12966; GFX12-WGP: ; %bb.0: ; %entry 12967; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12968; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12969; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12970; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12971; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12972; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12973; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12974; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12975; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12976; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12977; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12978; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12979; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12980; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12981; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12982; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12983; GFX12-WGP-NEXT: s_endpgm 12984; 12985; GFX12-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 12986; GFX12-CU: ; %bb.0: ; %entry 12987; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12988; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12989; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12990; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12991; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12992; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12993; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12994; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12995; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12996; GFX12-CU-NEXT: s_wait_storecnt 0x0 12997; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS 12998; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12999; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13000; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13001; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13002; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 13003; GFX12-CU-NEXT: s_endpgm 13004 ptr addrspace(1) %out, i32 %in) { 13005entry: 13006 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst 13007 store i32 %val, ptr addrspace(1) %out, align 4 13008 ret void 13009} 13010 13011define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( 13012; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13013; GFX6: ; %bb.0: ; %entry 13014; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13015; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13016; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13017; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13018; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13019; GFX6-NEXT: s_mov_b32 s12, s5 13020; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13021; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13022; GFX6-NEXT: s_mov_b32 s11, -1 13023; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13024; GFX6-NEXT: s_mov_b32 s5, s12 13025; GFX6-NEXT: s_mov_b32 s6, s11 13026; GFX6-NEXT: s_mov_b32 s7, s10 13027; GFX6-NEXT: v_mov_b32_e32 v0, s9 13028; GFX6-NEXT: v_mov_b32_e32 v2, s8 13029; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13030; GFX6-NEXT: v_mov_b32_e32 v1, v2 13031; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13032; GFX6-NEXT: s_endpgm 13033; 13034; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13035; GFX7: ; %bb.0: ; %entry 13036; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13037; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13038; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13039; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13040; GFX7-NEXT: s_mov_b64 s[10:11], 16 13041; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13042; GFX7-NEXT: s_mov_b32 s4, s8 13043; GFX7-NEXT: s_mov_b32 s5, s9 13044; GFX7-NEXT: s_mov_b32 s9, s10 13045; GFX7-NEXT: s_mov_b32 s8, s11 13046; GFX7-NEXT: s_add_u32 s4, s4, s9 13047; GFX7-NEXT: s_addc_u32 s8, s5, s8 13048; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13049; GFX7-NEXT: s_mov_b32 s5, s8 13050; GFX7-NEXT: v_mov_b32_e32 v2, s7 13051; GFX7-NEXT: v_mov_b32_e32 v0, s6 13052; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13053; GFX7-NEXT: v_mov_b32_e32 v3, v0 13054; GFX7-NEXT: v_mov_b32_e32 v0, s4 13055; GFX7-NEXT: v_mov_b32_e32 v1, s5 13056; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13057; GFX7-NEXT: s_endpgm 13058; 13059; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13060; GFX10-WGP: ; %bb.0: ; %entry 13061; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13062; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13063; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13064; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13065; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13066; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13067; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13068; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13069; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13070; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13071; GFX10-WGP-NEXT: s_endpgm 13072; 13073; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13074; GFX10-CU: ; %bb.0: ; %entry 13075; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13076; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13077; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13078; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13079; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13080; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13081; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13082; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13083; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13084; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13085; GFX10-CU-NEXT: s_endpgm 13086; 13087; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13088; SKIP-CACHE-INV: ; %bb.0: ; %entry 13089; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13090; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13091; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13092; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13093; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13094; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13095; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13096; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13097; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13098; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13099; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13100; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13101; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13103; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13104; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13106; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13107; SKIP-CACHE-INV-NEXT: s_endpgm 13108; 13109; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13110; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13111; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13112; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13113; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13114; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13115; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13116; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13117; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13118; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13119; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13120; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13121; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13122; 13123; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13124; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13125; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13126; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13127; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13128; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13129; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13130; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13131; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13132; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13133; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13134; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13135; GFX90A-TGSPLIT-NEXT: s_endpgm 13136; 13137; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13138; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13139; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13140; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13141; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13142; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13143; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13144; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13145; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13146; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13147; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13148; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13149; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13150; 13151; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13152; GFX940-TGSPLIT: ; %bb.0: ; %entry 13153; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13154; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13155; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13156; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13157; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13158; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13159; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13160; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13161; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13162; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13163; GFX940-TGSPLIT-NEXT: s_endpgm 13164; 13165; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13166; GFX11-WGP: ; %bb.0: ; %entry 13167; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13168; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13169; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13170; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13171; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13172; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13173; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13174; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13175; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13176; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13177; GFX11-WGP-NEXT: s_endpgm 13178; 13179; GFX11-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13180; GFX11-CU: ; %bb.0: ; %entry 13181; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13182; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13183; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13184; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13185; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13186; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13187; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13188; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13189; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13190; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13191; GFX11-CU-NEXT: s_endpgm 13192; 13193; GFX12-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13194; GFX12-WGP: ; %bb.0: ; %entry 13195; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13196; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13197; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13198; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13199; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13200; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13201; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13202; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13203; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13204; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13205; GFX12-WGP-NEXT: s_endpgm 13206; 13207; GFX12-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 13208; GFX12-CU: ; %bb.0: ; %entry 13209; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13210; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13211; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13212; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13213; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13214; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13215; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13216; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13217; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13218; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13219; GFX12-CU-NEXT: s_endpgm 13220 ptr addrspace(1) %out, i32 %in, i32 %old) { 13221entry: 13222 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13223 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 13224 ret void 13225} 13226 13227define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( 13228; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13229; GFX6: ; %bb.0: ; %entry 13230; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13231; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13232; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13233; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13234; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13235; GFX6-NEXT: s_mov_b32 s12, s5 13236; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13237; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13238; GFX6-NEXT: s_mov_b32 s11, -1 13239; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13240; GFX6-NEXT: s_mov_b32 s5, s12 13241; GFX6-NEXT: s_mov_b32 s6, s11 13242; GFX6-NEXT: s_mov_b32 s7, s10 13243; GFX6-NEXT: v_mov_b32_e32 v0, s9 13244; GFX6-NEXT: v_mov_b32_e32 v2, s8 13245; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13246; GFX6-NEXT: v_mov_b32_e32 v1, v2 13247; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13248; GFX6-NEXT: s_waitcnt vmcnt(0) 13249; GFX6-NEXT: buffer_wbinvl1 13250; GFX6-NEXT: s_endpgm 13251; 13252; GFX7-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13253; GFX7: ; %bb.0: ; %entry 13254; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13255; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13256; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13257; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13258; GFX7-NEXT: s_mov_b64 s[10:11], 16 13259; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13260; GFX7-NEXT: s_mov_b32 s4, s8 13261; GFX7-NEXT: s_mov_b32 s5, s9 13262; GFX7-NEXT: s_mov_b32 s9, s10 13263; GFX7-NEXT: s_mov_b32 s8, s11 13264; GFX7-NEXT: s_add_u32 s4, s4, s9 13265; GFX7-NEXT: s_addc_u32 s8, s5, s8 13266; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13267; GFX7-NEXT: s_mov_b32 s5, s8 13268; GFX7-NEXT: v_mov_b32_e32 v2, s7 13269; GFX7-NEXT: v_mov_b32_e32 v0, s6 13270; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13271; GFX7-NEXT: v_mov_b32_e32 v3, v0 13272; GFX7-NEXT: v_mov_b32_e32 v0, s4 13273; GFX7-NEXT: v_mov_b32_e32 v1, s5 13274; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13275; GFX7-NEXT: s_waitcnt vmcnt(0) 13276; GFX7-NEXT: buffer_wbinvl1_vol 13277; GFX7-NEXT: s_endpgm 13278; 13279; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13280; GFX10-WGP: ; %bb.0: ; %entry 13281; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13282; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13283; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13284; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13285; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13286; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13287; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13288; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13289; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13290; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13291; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13292; GFX10-WGP-NEXT: buffer_gl1_inv 13293; GFX10-WGP-NEXT: buffer_gl0_inv 13294; GFX10-WGP-NEXT: s_endpgm 13295; 13296; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13297; GFX10-CU: ; %bb.0: ; %entry 13298; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13299; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13300; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13301; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13302; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13303; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13304; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13305; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13306; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13307; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13308; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13309; GFX10-CU-NEXT: buffer_gl1_inv 13310; GFX10-CU-NEXT: buffer_gl0_inv 13311; GFX10-CU-NEXT: s_endpgm 13312; 13313; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13314; SKIP-CACHE-INV: ; %bb.0: ; %entry 13315; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13316; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13317; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13318; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13319; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13320; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13321; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13322; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13323; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13324; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13325; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13326; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13327; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13328; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13329; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13330; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13331; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13332; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13333; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13334; SKIP-CACHE-INV-NEXT: s_endpgm 13335; 13336; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13337; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13338; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13339; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13340; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13341; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13342; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13343; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13344; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13345; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13346; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13347; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13348; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13349; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13350; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13351; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13352; 13353; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13354; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13355; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13356; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13357; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13358; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13359; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13360; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13361; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13362; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13363; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13364; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13365; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13366; GFX90A-TGSPLIT-NEXT: buffer_invl2 13367; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13368; GFX90A-TGSPLIT-NEXT: s_endpgm 13369; 13370; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13371; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13372; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13373; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13374; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13375; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13376; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13377; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13378; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13379; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13380; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13381; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13382; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13383; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13384; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13385; 13386; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13387; GFX940-TGSPLIT: ; %bb.0: ; %entry 13388; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13389; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13390; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13391; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13392; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13393; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13394; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13395; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13396; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13397; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13398; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13399; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13400; GFX940-TGSPLIT-NEXT: s_endpgm 13401; 13402; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13403; GFX11-WGP: ; %bb.0: ; %entry 13404; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13405; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13406; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13407; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13408; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13409; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13410; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13411; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13412; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13413; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13414; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13415; GFX11-WGP-NEXT: buffer_gl1_inv 13416; GFX11-WGP-NEXT: buffer_gl0_inv 13417; GFX11-WGP-NEXT: s_endpgm 13418; 13419; GFX11-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13420; GFX11-CU: ; %bb.0: ; %entry 13421; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13422; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13423; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13424; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13425; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13426; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13427; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13428; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13429; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13430; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13431; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13432; GFX11-CU-NEXT: buffer_gl1_inv 13433; GFX11-CU-NEXT: buffer_gl0_inv 13434; GFX11-CU-NEXT: s_endpgm 13435; 13436; GFX12-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13437; GFX12-WGP: ; %bb.0: ; %entry 13438; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13439; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13440; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13441; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13442; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13443; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13444; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13445; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13446; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13447; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13448; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13449; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13450; GFX12-WGP-NEXT: s_endpgm 13451; 13452; GFX12-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 13453; GFX12-CU: ; %bb.0: ; %entry 13454; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13455; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13456; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13457; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13458; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13459; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13460; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13461; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13462; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13463; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13464; GFX12-CU-NEXT: s_wait_storecnt 0x0 13465; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13466; GFX12-CU-NEXT: s_endpgm 13467 ptr addrspace(1) %out, i32 %in, i32 %old) { 13468entry: 13469 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13470 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 13471 ret void 13472} 13473 13474define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( 13475; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13476; GFX6: ; %bb.0: ; %entry 13477; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13478; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13479; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13480; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13481; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13482; GFX6-NEXT: s_mov_b32 s12, s5 13483; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13484; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13485; GFX6-NEXT: s_mov_b32 s11, -1 13486; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13487; GFX6-NEXT: s_mov_b32 s5, s12 13488; GFX6-NEXT: s_mov_b32 s6, s11 13489; GFX6-NEXT: s_mov_b32 s7, s10 13490; GFX6-NEXT: v_mov_b32_e32 v0, s9 13491; GFX6-NEXT: v_mov_b32_e32 v2, s8 13492; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13493; GFX6-NEXT: v_mov_b32_e32 v1, v2 13494; GFX6-NEXT: s_waitcnt vmcnt(0) 13495; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13496; GFX6-NEXT: s_endpgm 13497; 13498; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13499; GFX7: ; %bb.0: ; %entry 13500; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13501; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13502; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13503; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13504; GFX7-NEXT: s_mov_b64 s[10:11], 16 13505; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13506; GFX7-NEXT: s_mov_b32 s4, s8 13507; GFX7-NEXT: s_mov_b32 s5, s9 13508; GFX7-NEXT: s_mov_b32 s9, s10 13509; GFX7-NEXT: s_mov_b32 s8, s11 13510; GFX7-NEXT: s_add_u32 s4, s4, s9 13511; GFX7-NEXT: s_addc_u32 s8, s5, s8 13512; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13513; GFX7-NEXT: s_mov_b32 s5, s8 13514; GFX7-NEXT: v_mov_b32_e32 v2, s7 13515; GFX7-NEXT: v_mov_b32_e32 v0, s6 13516; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13517; GFX7-NEXT: v_mov_b32_e32 v3, v0 13518; GFX7-NEXT: v_mov_b32_e32 v0, s4 13519; GFX7-NEXT: v_mov_b32_e32 v1, s5 13520; GFX7-NEXT: s_waitcnt vmcnt(0) 13521; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13522; GFX7-NEXT: s_endpgm 13523; 13524; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13525; GFX10-WGP: ; %bb.0: ; %entry 13526; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13527; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13528; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13529; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13530; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13531; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13532; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13533; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13534; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13535; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13536; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13537; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13538; GFX10-WGP-NEXT: s_endpgm 13539; 13540; GFX10-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13541; GFX10-CU: ; %bb.0: ; %entry 13542; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13543; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13544; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13545; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13546; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13547; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13548; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13549; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13550; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13551; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13552; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13553; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13554; GFX10-CU-NEXT: s_endpgm 13555; 13556; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13557; SKIP-CACHE-INV: ; %bb.0: ; %entry 13558; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13559; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13560; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13561; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13562; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13563; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13564; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13565; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13566; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13567; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13568; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13569; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13570; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13571; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13572; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13573; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13574; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13575; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13576; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13577; SKIP-CACHE-INV-NEXT: s_endpgm 13578; 13579; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13580; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13581; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13582; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13583; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13584; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13585; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13586; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13587; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13588; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13589; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13590; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13591; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13592; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13593; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13594; 13595; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13596; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13597; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13598; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13599; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13600; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13601; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13602; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13603; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13604; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13605; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13606; GFX90A-TGSPLIT-NEXT: buffer_wbl2 13607; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13608; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13609; GFX90A-TGSPLIT-NEXT: s_endpgm 13610; 13611; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13612; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13613; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13614; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13615; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13616; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13617; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13618; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13619; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13620; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13621; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13622; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13623; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13624; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13625; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13626; 13627; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13628; GFX940-TGSPLIT: ; %bb.0: ; %entry 13629; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13630; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13631; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13632; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13633; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13634; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13635; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13636; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13637; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13638; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13639; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13640; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13641; GFX940-TGSPLIT-NEXT: s_endpgm 13642; 13643; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13644; GFX11-WGP: ; %bb.0: ; %entry 13645; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13646; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13647; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13648; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13649; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13650; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13651; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13652; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13653; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13654; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13655; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13656; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13657; GFX11-WGP-NEXT: s_endpgm 13658; 13659; GFX11-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13660; GFX11-CU: ; %bb.0: ; %entry 13661; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13662; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13663; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13664; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13665; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13666; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13667; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13668; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13669; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13670; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13671; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13672; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13673; GFX11-CU-NEXT: s_endpgm 13674; 13675; GFX12-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13676; GFX12-WGP: ; %bb.0: ; %entry 13677; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13678; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13679; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13680; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13681; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13682; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13683; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13684; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13685; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13686; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 13687; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13688; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13689; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13690; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13691; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13692; GFX12-WGP-NEXT: s_endpgm 13693; 13694; GFX12-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg: 13695; GFX12-CU: ; %bb.0: ; %entry 13696; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13697; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13698; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13699; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13700; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13701; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13702; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13703; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13704; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13705; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 13706; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13707; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13708; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13709; GFX12-CU-NEXT: s_wait_storecnt 0x0 13710; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13711; GFX12-CU-NEXT: s_endpgm 13712 ptr addrspace(1) %out, i32 %in, i32 %old) { 13713entry: 13714 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13715 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 13716 ret void 13717} 13718 13719define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( 13720; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13721; GFX6: ; %bb.0: ; %entry 13722; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13723; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13724; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13725; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13726; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13727; GFX6-NEXT: s_mov_b32 s12, s5 13728; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13729; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13730; GFX6-NEXT: s_mov_b32 s11, -1 13731; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13732; GFX6-NEXT: s_mov_b32 s5, s12 13733; GFX6-NEXT: s_mov_b32 s6, s11 13734; GFX6-NEXT: s_mov_b32 s7, s10 13735; GFX6-NEXT: v_mov_b32_e32 v0, s9 13736; GFX6-NEXT: v_mov_b32_e32 v2, s8 13737; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13738; GFX6-NEXT: v_mov_b32_e32 v1, v2 13739; GFX6-NEXT: s_waitcnt vmcnt(0) 13740; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13741; GFX6-NEXT: s_waitcnt vmcnt(0) 13742; GFX6-NEXT: buffer_wbinvl1 13743; GFX6-NEXT: s_endpgm 13744; 13745; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13746; GFX7: ; %bb.0: ; %entry 13747; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13748; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13749; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13750; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13751; GFX7-NEXT: s_mov_b64 s[10:11], 16 13752; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13753; GFX7-NEXT: s_mov_b32 s4, s8 13754; GFX7-NEXT: s_mov_b32 s5, s9 13755; GFX7-NEXT: s_mov_b32 s9, s10 13756; GFX7-NEXT: s_mov_b32 s8, s11 13757; GFX7-NEXT: s_add_u32 s4, s4, s9 13758; GFX7-NEXT: s_addc_u32 s8, s5, s8 13759; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13760; GFX7-NEXT: s_mov_b32 s5, s8 13761; GFX7-NEXT: v_mov_b32_e32 v2, s7 13762; GFX7-NEXT: v_mov_b32_e32 v0, s6 13763; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13764; GFX7-NEXT: v_mov_b32_e32 v3, v0 13765; GFX7-NEXT: v_mov_b32_e32 v0, s4 13766; GFX7-NEXT: v_mov_b32_e32 v1, s5 13767; GFX7-NEXT: s_waitcnt vmcnt(0) 13768; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13769; GFX7-NEXT: s_waitcnt vmcnt(0) 13770; GFX7-NEXT: buffer_wbinvl1_vol 13771; GFX7-NEXT: s_endpgm 13772; 13773; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13774; GFX10-WGP: ; %bb.0: ; %entry 13775; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13776; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13777; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13778; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13779; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13780; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13781; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13782; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13783; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13784; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13785; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13786; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13787; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13788; GFX10-WGP-NEXT: buffer_gl1_inv 13789; GFX10-WGP-NEXT: buffer_gl0_inv 13790; GFX10-WGP-NEXT: s_endpgm 13791; 13792; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13793; GFX10-CU: ; %bb.0: ; %entry 13794; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13795; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13796; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13797; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13798; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13799; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13800; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13801; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13802; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13803; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13804; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13805; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13806; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13807; GFX10-CU-NEXT: buffer_gl1_inv 13808; GFX10-CU-NEXT: buffer_gl0_inv 13809; GFX10-CU-NEXT: s_endpgm 13810; 13811; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13812; SKIP-CACHE-INV: ; %bb.0: ; %entry 13813; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13814; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13815; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13816; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13817; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13818; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13819; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13820; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13821; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13822; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13823; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13824; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13825; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13826; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13827; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13828; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13829; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13830; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13831; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13832; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13833; SKIP-CACHE-INV-NEXT: s_endpgm 13834; 13835; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13836; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13837; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13838; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13839; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13840; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13841; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13842; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13843; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13844; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13845; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13846; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13847; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13848; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13849; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13850; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13851; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13852; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13853; 13854; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13855; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13856; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13857; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13858; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13859; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13860; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13861; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13862; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13863; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13864; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13865; GFX90A-TGSPLIT-NEXT: buffer_wbl2 13866; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13867; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13868; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13869; GFX90A-TGSPLIT-NEXT: buffer_invl2 13870; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13871; GFX90A-TGSPLIT-NEXT: s_endpgm 13872; 13873; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13874; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13875; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13876; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13877; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13878; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13879; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13880; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13881; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13882; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13883; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13884; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13885; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13886; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13887; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13888; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13889; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13890; 13891; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13892; GFX940-TGSPLIT: ; %bb.0: ; %entry 13893; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13894; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13895; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13896; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13897; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13898; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13899; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13900; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13901; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13902; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13903; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13904; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 13905; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13906; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13907; GFX940-TGSPLIT-NEXT: s_endpgm 13908; 13909; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13910; GFX11-WGP: ; %bb.0: ; %entry 13911; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13912; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13913; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13914; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13915; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13916; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13917; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13918; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13919; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13920; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13921; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13922; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13923; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13924; GFX11-WGP-NEXT: buffer_gl1_inv 13925; GFX11-WGP-NEXT: buffer_gl0_inv 13926; GFX11-WGP-NEXT: s_endpgm 13927; 13928; GFX11-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13929; GFX11-CU: ; %bb.0: ; %entry 13930; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13931; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13932; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13933; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13934; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13935; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13936; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13937; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13938; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13939; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13940; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13941; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13942; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13943; GFX11-CU-NEXT: buffer_gl1_inv 13944; GFX11-CU-NEXT: buffer_gl0_inv 13945; GFX11-CU-NEXT: s_endpgm 13946; 13947; GFX12-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13948; GFX12-WGP: ; %bb.0: ; %entry 13949; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13950; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13951; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13952; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13953; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13954; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13955; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13956; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13957; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13958; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 13959; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13960; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13961; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13962; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13963; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13964; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13965; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13966; GFX12-WGP-NEXT: s_endpgm 13967; 13968; GFX12-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 13969; GFX12-CU: ; %bb.0: ; %entry 13970; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13971; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13972; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13973; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13974; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13975; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13976; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13977; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13978; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13979; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 13980; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13981; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13982; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13983; GFX12-CU-NEXT: s_wait_storecnt 0x0 13984; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 13985; GFX12-CU-NEXT: s_wait_storecnt 0x0 13986; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13987; GFX12-CU-NEXT: s_endpgm 13988 ptr addrspace(1) %out, i32 %in, i32 %old) { 13989entry: 13990 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13991 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 13992 ret void 13993} 13994 13995define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( 13996; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 13997; GFX6: ; %bb.0: ; %entry 13998; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13999; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14000; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14001; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14002; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14003; GFX6-NEXT: s_mov_b32 s12, s5 14004; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14005; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14006; GFX6-NEXT: s_mov_b32 s11, -1 14007; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14008; GFX6-NEXT: s_mov_b32 s5, s12 14009; GFX6-NEXT: s_mov_b32 s6, s11 14010; GFX6-NEXT: s_mov_b32 s7, s10 14011; GFX6-NEXT: v_mov_b32_e32 v0, s9 14012; GFX6-NEXT: v_mov_b32_e32 v2, s8 14013; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14014; GFX6-NEXT: v_mov_b32_e32 v1, v2 14015; GFX6-NEXT: s_waitcnt vmcnt(0) 14016; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14017; GFX6-NEXT: s_waitcnt vmcnt(0) 14018; GFX6-NEXT: buffer_wbinvl1 14019; GFX6-NEXT: s_endpgm 14020; 14021; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14022; GFX7: ; %bb.0: ; %entry 14023; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14024; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14025; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14026; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14027; GFX7-NEXT: s_mov_b64 s[10:11], 16 14028; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14029; GFX7-NEXT: s_mov_b32 s4, s8 14030; GFX7-NEXT: s_mov_b32 s5, s9 14031; GFX7-NEXT: s_mov_b32 s9, s10 14032; GFX7-NEXT: s_mov_b32 s8, s11 14033; GFX7-NEXT: s_add_u32 s4, s4, s9 14034; GFX7-NEXT: s_addc_u32 s8, s5, s8 14035; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14036; GFX7-NEXT: s_mov_b32 s5, s8 14037; GFX7-NEXT: v_mov_b32_e32 v2, s7 14038; GFX7-NEXT: v_mov_b32_e32 v0, s6 14039; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14040; GFX7-NEXT: v_mov_b32_e32 v3, v0 14041; GFX7-NEXT: v_mov_b32_e32 v0, s4 14042; GFX7-NEXT: v_mov_b32_e32 v1, s5 14043; GFX7-NEXT: s_waitcnt vmcnt(0) 14044; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14045; GFX7-NEXT: s_waitcnt vmcnt(0) 14046; GFX7-NEXT: buffer_wbinvl1_vol 14047; GFX7-NEXT: s_endpgm 14048; 14049; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14050; GFX10-WGP: ; %bb.0: ; %entry 14051; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14052; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14053; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14054; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14055; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14056; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14057; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14058; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14059; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14060; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14061; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14062; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14063; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14064; GFX10-WGP-NEXT: buffer_gl1_inv 14065; GFX10-WGP-NEXT: buffer_gl0_inv 14066; GFX10-WGP-NEXT: s_endpgm 14067; 14068; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14069; GFX10-CU: ; %bb.0: ; %entry 14070; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14071; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14072; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14073; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14074; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14075; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14076; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14077; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14078; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14079; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14080; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14081; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14082; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14083; GFX10-CU-NEXT: buffer_gl1_inv 14084; GFX10-CU-NEXT: buffer_gl0_inv 14085; GFX10-CU-NEXT: s_endpgm 14086; 14087; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14088; SKIP-CACHE-INV: ; %bb.0: ; %entry 14089; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14090; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14091; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14092; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14093; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14094; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14095; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14096; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14097; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14098; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14099; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14100; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14101; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14103; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14104; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14106; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14107; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14108; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14109; SKIP-CACHE-INV-NEXT: s_endpgm 14110; 14111; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14112; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14113; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14114; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14115; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14116; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14117; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14118; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14119; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14120; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14122; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 14123; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14124; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14125; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14126; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14127; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14128; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14129; 14130; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14131; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14132; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14133; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14134; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14135; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14136; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14137; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14138; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14139; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14140; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14141; GFX90A-TGSPLIT-NEXT: buffer_wbl2 14142; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14143; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14144; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14145; GFX90A-TGSPLIT-NEXT: buffer_invl2 14146; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14147; GFX90A-TGSPLIT-NEXT: s_endpgm 14148; 14149; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14150; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14151; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14152; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14153; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14154; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14155; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14156; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14157; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14158; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14159; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14160; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14161; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14162; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14163; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14164; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14165; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14166; 14167; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14168; GFX940-TGSPLIT: ; %bb.0: ; %entry 14169; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14170; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14171; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14172; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14173; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14174; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14175; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14176; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14177; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14178; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14179; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14180; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14181; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14182; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14183; GFX940-TGSPLIT-NEXT: s_endpgm 14184; 14185; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14186; GFX11-WGP: ; %bb.0: ; %entry 14187; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14188; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14189; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14190; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14191; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14192; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14193; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14194; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14195; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14196; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14197; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14198; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14199; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14200; GFX11-WGP-NEXT: buffer_gl1_inv 14201; GFX11-WGP-NEXT: buffer_gl0_inv 14202; GFX11-WGP-NEXT: s_endpgm 14203; 14204; GFX11-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14205; GFX11-CU: ; %bb.0: ; %entry 14206; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14207; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14208; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14209; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14210; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14211; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14212; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14213; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14214; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14215; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14216; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14217; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14218; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14219; GFX11-CU-NEXT: buffer_gl1_inv 14220; GFX11-CU-NEXT: buffer_gl0_inv 14221; GFX11-CU-NEXT: s_endpgm 14222; 14223; GFX12-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14224; GFX12-WGP: ; %bb.0: ; %entry 14225; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14226; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14227; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14228; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14229; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14230; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14231; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14232; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14233; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14234; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 14235; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14236; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14237; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14238; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14239; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14240; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14241; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14242; GFX12-WGP-NEXT: s_endpgm 14243; 14244; GFX12-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 14245; GFX12-CU: ; %bb.0: ; %entry 14246; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14247; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14248; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14249; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14250; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14251; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14252; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14253; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14254; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14255; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 14256; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14257; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14258; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14259; GFX12-CU-NEXT: s_wait_storecnt 0x0 14260; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14261; GFX12-CU-NEXT: s_wait_storecnt 0x0 14262; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14263; GFX12-CU-NEXT: s_endpgm 14264 ptr addrspace(1) %out, i32 %in, i32 %old) { 14265entry: 14266 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14267 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 14268 ret void 14269} 14270 14271define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( 14272; GFX6-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14273; GFX6: ; %bb.0: ; %entry 14274; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14275; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14276; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14277; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14278; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14279; GFX6-NEXT: s_mov_b32 s12, s5 14280; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14281; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14282; GFX6-NEXT: s_mov_b32 s11, -1 14283; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14284; GFX6-NEXT: s_mov_b32 s5, s12 14285; GFX6-NEXT: s_mov_b32 s6, s11 14286; GFX6-NEXT: s_mov_b32 s7, s10 14287; GFX6-NEXT: v_mov_b32_e32 v0, s9 14288; GFX6-NEXT: v_mov_b32_e32 v2, s8 14289; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14290; GFX6-NEXT: v_mov_b32_e32 v1, v2 14291; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14292; GFX6-NEXT: s_waitcnt vmcnt(0) 14293; GFX6-NEXT: buffer_wbinvl1 14294; GFX6-NEXT: s_endpgm 14295; 14296; GFX7-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14297; GFX7: ; %bb.0: ; %entry 14298; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14299; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14300; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14301; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14302; GFX7-NEXT: s_mov_b64 s[10:11], 16 14303; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14304; GFX7-NEXT: s_mov_b32 s4, s8 14305; GFX7-NEXT: s_mov_b32 s5, s9 14306; GFX7-NEXT: s_mov_b32 s9, s10 14307; GFX7-NEXT: s_mov_b32 s8, s11 14308; GFX7-NEXT: s_add_u32 s4, s4, s9 14309; GFX7-NEXT: s_addc_u32 s8, s5, s8 14310; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14311; GFX7-NEXT: s_mov_b32 s5, s8 14312; GFX7-NEXT: v_mov_b32_e32 v2, s7 14313; GFX7-NEXT: v_mov_b32_e32 v0, s6 14314; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14315; GFX7-NEXT: v_mov_b32_e32 v3, v0 14316; GFX7-NEXT: v_mov_b32_e32 v0, s4 14317; GFX7-NEXT: v_mov_b32_e32 v1, s5 14318; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14319; GFX7-NEXT: s_waitcnt vmcnt(0) 14320; GFX7-NEXT: buffer_wbinvl1_vol 14321; GFX7-NEXT: s_endpgm 14322; 14323; GFX10-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14324; GFX10-WGP: ; %bb.0: ; %entry 14325; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14326; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14327; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14328; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14329; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14330; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14331; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14332; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14333; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14334; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14335; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14336; GFX10-WGP-NEXT: buffer_gl1_inv 14337; GFX10-WGP-NEXT: buffer_gl0_inv 14338; GFX10-WGP-NEXT: s_endpgm 14339; 14340; GFX10-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14341; GFX10-CU: ; %bb.0: ; %entry 14342; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14343; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14344; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14345; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14346; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14347; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14348; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14349; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14350; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14351; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14352; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14353; GFX10-CU-NEXT: buffer_gl1_inv 14354; GFX10-CU-NEXT: buffer_gl0_inv 14355; GFX10-CU-NEXT: s_endpgm 14356; 14357; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14358; SKIP-CACHE-INV: ; %bb.0: ; %entry 14359; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14360; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14361; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14362; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14363; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14364; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14365; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14366; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14367; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14368; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14369; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14370; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14371; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14372; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14373; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14374; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14375; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14376; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14377; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14378; SKIP-CACHE-INV-NEXT: s_endpgm 14379; 14380; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14381; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14382; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14383; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14384; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14385; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14386; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14387; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14388; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14389; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14390; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14391; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14392; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14393; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14394; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14395; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14396; 14397; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14398; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14399; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14400; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14401; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14402; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14403; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14404; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14405; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14406; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14407; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14408; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14409; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14410; GFX90A-TGSPLIT-NEXT: buffer_invl2 14411; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14412; GFX90A-TGSPLIT-NEXT: s_endpgm 14413; 14414; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14415; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14416; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14417; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14418; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14419; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14420; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14421; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14422; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14423; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14424; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14425; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14426; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14427; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14428; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14429; 14430; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14431; GFX940-TGSPLIT: ; %bb.0: ; %entry 14432; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14433; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14434; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14435; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14436; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14437; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14438; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14439; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14440; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14441; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14442; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14443; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14444; GFX940-TGSPLIT-NEXT: s_endpgm 14445; 14446; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14447; GFX11-WGP: ; %bb.0: ; %entry 14448; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14449; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14450; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14451; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14452; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14453; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14454; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14455; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14456; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14457; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14458; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14459; GFX11-WGP-NEXT: buffer_gl1_inv 14460; GFX11-WGP-NEXT: buffer_gl0_inv 14461; GFX11-WGP-NEXT: s_endpgm 14462; 14463; GFX11-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14464; GFX11-CU: ; %bb.0: ; %entry 14465; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14466; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14467; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14468; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14469; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14470; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14471; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14472; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14473; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14474; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14475; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14476; GFX11-CU-NEXT: buffer_gl1_inv 14477; GFX11-CU-NEXT: buffer_gl0_inv 14478; GFX11-CU-NEXT: s_endpgm 14479; 14480; GFX12-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14481; GFX12-WGP: ; %bb.0: ; %entry 14482; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14483; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14484; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14485; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14486; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14487; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14488; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14489; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14490; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14491; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14492; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14493; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14494; GFX12-WGP-NEXT: s_endpgm 14495; 14496; GFX12-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: 14497; GFX12-CU: ; %bb.0: ; %entry 14498; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14499; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14500; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14501; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14502; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14503; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14504; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14505; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14506; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14507; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14508; GFX12-CU-NEXT: s_wait_storecnt 0x0 14509; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14510; GFX12-CU-NEXT: s_endpgm 14511 ptr addrspace(1) %out, i32 %in, i32 %old) { 14512entry: 14513 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14514 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire 14515 ret void 14516} 14517 14518define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( 14519; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14520; GFX6: ; %bb.0: ; %entry 14521; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14522; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14523; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14524; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14525; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14526; GFX6-NEXT: s_mov_b32 s12, s5 14527; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14528; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14529; GFX6-NEXT: s_mov_b32 s11, -1 14530; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14531; GFX6-NEXT: s_mov_b32 s5, s12 14532; GFX6-NEXT: s_mov_b32 s6, s11 14533; GFX6-NEXT: s_mov_b32 s7, s10 14534; GFX6-NEXT: v_mov_b32_e32 v0, s9 14535; GFX6-NEXT: v_mov_b32_e32 v2, s8 14536; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14537; GFX6-NEXT: v_mov_b32_e32 v1, v2 14538; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14539; GFX6-NEXT: s_waitcnt vmcnt(0) 14540; GFX6-NEXT: buffer_wbinvl1 14541; GFX6-NEXT: s_endpgm 14542; 14543; GFX7-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14544; GFX7: ; %bb.0: ; %entry 14545; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14546; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14547; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14548; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14549; GFX7-NEXT: s_mov_b64 s[10:11], 16 14550; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14551; GFX7-NEXT: s_mov_b32 s4, s8 14552; GFX7-NEXT: s_mov_b32 s5, s9 14553; GFX7-NEXT: s_mov_b32 s9, s10 14554; GFX7-NEXT: s_mov_b32 s8, s11 14555; GFX7-NEXT: s_add_u32 s4, s4, s9 14556; GFX7-NEXT: s_addc_u32 s8, s5, s8 14557; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14558; GFX7-NEXT: s_mov_b32 s5, s8 14559; GFX7-NEXT: v_mov_b32_e32 v2, s7 14560; GFX7-NEXT: v_mov_b32_e32 v0, s6 14561; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14562; GFX7-NEXT: v_mov_b32_e32 v3, v0 14563; GFX7-NEXT: v_mov_b32_e32 v0, s4 14564; GFX7-NEXT: v_mov_b32_e32 v1, s5 14565; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14566; GFX7-NEXT: s_waitcnt vmcnt(0) 14567; GFX7-NEXT: buffer_wbinvl1_vol 14568; GFX7-NEXT: s_endpgm 14569; 14570; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14571; GFX10-WGP: ; %bb.0: ; %entry 14572; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14573; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14574; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14575; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14576; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14577; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14578; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14579; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14580; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14581; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14582; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14583; GFX10-WGP-NEXT: buffer_gl1_inv 14584; GFX10-WGP-NEXT: buffer_gl0_inv 14585; GFX10-WGP-NEXT: s_endpgm 14586; 14587; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14588; GFX10-CU: ; %bb.0: ; %entry 14589; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14590; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14591; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14592; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14593; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14594; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14595; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14596; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14597; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14598; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14599; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14600; GFX10-CU-NEXT: buffer_gl1_inv 14601; GFX10-CU-NEXT: buffer_gl0_inv 14602; GFX10-CU-NEXT: s_endpgm 14603; 14604; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14605; SKIP-CACHE-INV: ; %bb.0: ; %entry 14606; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14607; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14608; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14609; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14610; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14611; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14612; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14613; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14614; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14615; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14616; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14617; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14618; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14619; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14620; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14621; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14622; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14623; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14624; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14625; SKIP-CACHE-INV-NEXT: s_endpgm 14626; 14627; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14628; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14629; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14630; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14631; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14632; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14633; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14634; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14635; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14636; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14637; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14638; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14639; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14640; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14641; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14642; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14643; 14644; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14645; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14646; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14647; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14648; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14649; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14650; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14651; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14652; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14653; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14654; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14655; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14656; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14657; GFX90A-TGSPLIT-NEXT: buffer_invl2 14658; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14659; GFX90A-TGSPLIT-NEXT: s_endpgm 14660; 14661; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14662; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14663; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14664; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14665; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14666; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14667; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14668; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14669; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14670; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14671; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14672; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14673; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14674; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14675; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14676; 14677; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14678; GFX940-TGSPLIT: ; %bb.0: ; %entry 14679; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14680; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14681; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14682; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14683; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14684; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14685; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14686; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14687; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14688; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14689; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14690; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14691; GFX940-TGSPLIT-NEXT: s_endpgm 14692; 14693; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14694; GFX11-WGP: ; %bb.0: ; %entry 14695; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14696; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14697; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14698; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14699; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14700; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14701; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14702; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14703; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14704; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14705; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14706; GFX11-WGP-NEXT: buffer_gl1_inv 14707; GFX11-WGP-NEXT: buffer_gl0_inv 14708; GFX11-WGP-NEXT: s_endpgm 14709; 14710; GFX11-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14711; GFX11-CU: ; %bb.0: ; %entry 14712; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14713; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14714; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14715; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14716; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14717; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14718; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14719; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14720; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14721; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14722; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14723; GFX11-CU-NEXT: buffer_gl1_inv 14724; GFX11-CU-NEXT: buffer_gl0_inv 14725; GFX11-CU-NEXT: s_endpgm 14726; 14727; GFX12-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14728; GFX12-WGP: ; %bb.0: ; %entry 14729; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14730; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14731; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14732; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14733; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14734; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14735; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14736; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14737; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14738; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14739; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14740; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14741; GFX12-WGP-NEXT: s_endpgm 14742; 14743; GFX12-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 14744; GFX12-CU: ; %bb.0: ; %entry 14745; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14746; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14747; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14748; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14749; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14750; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14751; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14752; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14753; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14754; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 14755; GFX12-CU-NEXT: s_wait_storecnt 0x0 14756; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14757; GFX12-CU-NEXT: s_endpgm 14758 ptr addrspace(1) %out, i32 %in, i32 %old) { 14759entry: 14760 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14761 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 14762 ret void 14763} 14764 14765define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( 14766; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg: 14767; GFX6: ; %bb.0: ; %entry 14768; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14769; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14770; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14771; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14772; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14773; GFX6-NEXT: s_mov_b32 s12, s5 14774; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14775; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14776; GFX6-NEXT: s_mov_b32 s11, -1 14777; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14778; GFX6-NEXT: s_mov_b32 s5, s12 14779; GFX6-NEXT: s_mov_b32 s6, s11 14780; GFX6-NEXT: s_mov_b32 s7, s10 14781; GFX6-NEXT: v_mov_b32_e32 v0, s9 14782; GFX6-NEXT: v_mov_b32_e32 v2, s8 14783; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14784; GFX6-NEXT: v_mov_b32_e32 v1, v2 14785; GFX6-NEXT: s_waitcnt vmcnt(0) 14786; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14787; GFX6-NEXT: s_waitcnt vmcnt(0) 14788; GFX6-NEXT: buffer_wbinvl1 14789; GFX6-NEXT: s_endpgm 14790; 14791; GFX7-LABEL: global_system_one_as_release_acquire_cmpxchg: 14792; GFX7: ; %bb.0: ; %entry 14793; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14794; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14795; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14796; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14797; GFX7-NEXT: s_mov_b64 s[10:11], 16 14798; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14799; GFX7-NEXT: s_mov_b32 s4, s8 14800; GFX7-NEXT: s_mov_b32 s5, s9 14801; GFX7-NEXT: s_mov_b32 s9, s10 14802; GFX7-NEXT: s_mov_b32 s8, s11 14803; GFX7-NEXT: s_add_u32 s4, s4, s9 14804; GFX7-NEXT: s_addc_u32 s8, s5, s8 14805; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14806; GFX7-NEXT: s_mov_b32 s5, s8 14807; GFX7-NEXT: v_mov_b32_e32 v2, s7 14808; GFX7-NEXT: v_mov_b32_e32 v0, s6 14809; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14810; GFX7-NEXT: v_mov_b32_e32 v3, v0 14811; GFX7-NEXT: v_mov_b32_e32 v0, s4 14812; GFX7-NEXT: v_mov_b32_e32 v1, s5 14813; GFX7-NEXT: s_waitcnt vmcnt(0) 14814; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14815; GFX7-NEXT: s_waitcnt vmcnt(0) 14816; GFX7-NEXT: buffer_wbinvl1_vol 14817; GFX7-NEXT: s_endpgm 14818; 14819; GFX10-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: 14820; GFX10-WGP: ; %bb.0: ; %entry 14821; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14822; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14823; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14824; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14825; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14826; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14827; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14828; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14829; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14830; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14831; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14832; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14833; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14834; GFX10-WGP-NEXT: buffer_gl1_inv 14835; GFX10-WGP-NEXT: buffer_gl0_inv 14836; GFX10-WGP-NEXT: s_endpgm 14837; 14838; GFX10-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: 14839; GFX10-CU: ; %bb.0: ; %entry 14840; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14841; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14842; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14843; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14844; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14845; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14846; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14847; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14848; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14849; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14850; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14851; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14852; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14853; GFX10-CU-NEXT: buffer_gl1_inv 14854; GFX10-CU-NEXT: buffer_gl0_inv 14855; GFX10-CU-NEXT: s_endpgm 14856; 14857; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: 14858; SKIP-CACHE-INV: ; %bb.0: ; %entry 14859; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14860; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14861; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14862; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14863; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14864; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14865; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14866; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14867; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14868; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14869; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14870; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14871; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14872; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14873; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14874; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14875; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14876; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14877; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14878; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14879; SKIP-CACHE-INV-NEXT: s_endpgm 14880; 14881; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 14882; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14883; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14884; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14885; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14886; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14887; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14888; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14889; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14890; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14891; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14892; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 14893; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14894; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14895; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14896; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14897; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14898; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14899; 14900; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 14901; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14902; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14903; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14904; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14905; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14906; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14907; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14908; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14909; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14910; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14911; GFX90A-TGSPLIT-NEXT: buffer_wbl2 14912; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14913; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14914; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14915; GFX90A-TGSPLIT-NEXT: buffer_invl2 14916; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14917; GFX90A-TGSPLIT-NEXT: s_endpgm 14918; 14919; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 14920; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14921; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14922; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14923; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14924; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14925; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14926; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14927; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14928; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14929; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14930; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14931; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14932; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14933; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14934; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14935; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14936; 14937; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: 14938; GFX940-TGSPLIT: ; %bb.0: ; %entry 14939; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14940; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14941; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14942; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14943; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14944; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14945; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14946; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14947; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14948; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14949; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14950; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 14951; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14952; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14953; GFX940-TGSPLIT-NEXT: s_endpgm 14954; 14955; GFX11-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: 14956; GFX11-WGP: ; %bb.0: ; %entry 14957; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14958; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14959; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14960; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14961; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14962; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14963; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14964; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14965; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14966; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14967; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14968; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14969; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14970; GFX11-WGP-NEXT: buffer_gl1_inv 14971; GFX11-WGP-NEXT: buffer_gl0_inv 14972; GFX11-WGP-NEXT: s_endpgm 14973; 14974; GFX11-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: 14975; GFX11-CU: ; %bb.0: ; %entry 14976; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14977; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14978; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14979; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14980; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14981; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14982; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14983; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14984; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14985; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14986; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14987; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14988; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14989; GFX11-CU-NEXT: buffer_gl1_inv 14990; GFX11-CU-NEXT: buffer_gl0_inv 14991; GFX11-CU-NEXT: s_endpgm 14992; 14993; GFX12-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: 14994; GFX12-WGP: ; %bb.0: ; %entry 14995; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14996; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14997; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14998; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14999; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15000; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15001; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15002; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15003; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15004; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15005; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15006; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15007; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15008; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15009; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15010; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15011; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15012; GFX12-WGP-NEXT: s_endpgm 15013; 15014; GFX12-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: 15015; GFX12-CU: ; %bb.0: ; %entry 15016; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15017; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15018; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15019; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15020; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15021; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15022; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15023; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15024; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15025; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15026; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15027; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15028; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15029; GFX12-CU-NEXT: s_wait_storecnt 0x0 15030; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15031; GFX12-CU-NEXT: s_wait_storecnt 0x0 15032; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15033; GFX12-CU-NEXT: s_endpgm 15034 ptr addrspace(1) %out, i32 %in, i32 %old) { 15035entry: 15036 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15037 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire 15038 ret void 15039} 15040 15041define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( 15042; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15043; GFX6: ; %bb.0: ; %entry 15044; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15045; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15046; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15047; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15048; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15049; GFX6-NEXT: s_mov_b32 s12, s5 15050; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15051; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15052; GFX6-NEXT: s_mov_b32 s11, -1 15053; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15054; GFX6-NEXT: s_mov_b32 s5, s12 15055; GFX6-NEXT: s_mov_b32 s6, s11 15056; GFX6-NEXT: s_mov_b32 s7, s10 15057; GFX6-NEXT: v_mov_b32_e32 v0, s9 15058; GFX6-NEXT: v_mov_b32_e32 v2, s8 15059; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15060; GFX6-NEXT: v_mov_b32_e32 v1, v2 15061; GFX6-NEXT: s_waitcnt vmcnt(0) 15062; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15063; GFX6-NEXT: s_waitcnt vmcnt(0) 15064; GFX6-NEXT: buffer_wbinvl1 15065; GFX6-NEXT: s_endpgm 15066; 15067; GFX7-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15068; GFX7: ; %bb.0: ; %entry 15069; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15070; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15071; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15072; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15073; GFX7-NEXT: s_mov_b64 s[10:11], 16 15074; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15075; GFX7-NEXT: s_mov_b32 s4, s8 15076; GFX7-NEXT: s_mov_b32 s5, s9 15077; GFX7-NEXT: s_mov_b32 s9, s10 15078; GFX7-NEXT: s_mov_b32 s8, s11 15079; GFX7-NEXT: s_add_u32 s4, s4, s9 15080; GFX7-NEXT: s_addc_u32 s8, s5, s8 15081; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15082; GFX7-NEXT: s_mov_b32 s5, s8 15083; GFX7-NEXT: v_mov_b32_e32 v2, s7 15084; GFX7-NEXT: v_mov_b32_e32 v0, s6 15085; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15086; GFX7-NEXT: v_mov_b32_e32 v3, v0 15087; GFX7-NEXT: v_mov_b32_e32 v0, s4 15088; GFX7-NEXT: v_mov_b32_e32 v1, s5 15089; GFX7-NEXT: s_waitcnt vmcnt(0) 15090; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15091; GFX7-NEXT: s_waitcnt vmcnt(0) 15092; GFX7-NEXT: buffer_wbinvl1_vol 15093; GFX7-NEXT: s_endpgm 15094; 15095; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15096; GFX10-WGP: ; %bb.0: ; %entry 15097; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15098; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15099; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15100; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15101; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15102; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15103; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15104; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15105; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15106; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15107; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15108; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15109; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15110; GFX10-WGP-NEXT: buffer_gl1_inv 15111; GFX10-WGP-NEXT: buffer_gl0_inv 15112; GFX10-WGP-NEXT: s_endpgm 15113; 15114; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15115; GFX10-CU: ; %bb.0: ; %entry 15116; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15117; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15118; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15119; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15120; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15121; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15122; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15123; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15124; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15125; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15126; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15127; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15128; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15129; GFX10-CU-NEXT: buffer_gl1_inv 15130; GFX10-CU-NEXT: buffer_gl0_inv 15131; GFX10-CU-NEXT: s_endpgm 15132; 15133; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15134; SKIP-CACHE-INV: ; %bb.0: ; %entry 15135; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15136; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15137; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15138; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15139; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15140; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15141; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15142; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15143; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15144; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15145; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15146; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15147; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15148; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15149; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15150; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15151; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15152; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15153; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15154; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15155; SKIP-CACHE-INV-NEXT: s_endpgm 15156; 15157; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15158; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15159; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15160; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15161; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15162; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15163; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15164; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15165; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15166; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15167; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15168; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15169; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15170; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15171; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15172; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15173; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15174; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15175; 15176; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15177; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15178; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15179; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15180; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15181; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15182; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15183; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15184; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15185; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15186; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15187; GFX90A-TGSPLIT-NEXT: buffer_wbl2 15188; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15189; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15190; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15191; GFX90A-TGSPLIT-NEXT: buffer_invl2 15192; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15193; GFX90A-TGSPLIT-NEXT: s_endpgm 15194; 15195; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15196; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15197; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15198; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15199; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15200; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15201; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15202; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15203; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15204; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15205; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15206; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15207; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15208; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15209; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15210; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15211; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15212; 15213; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15214; GFX940-TGSPLIT: ; %bb.0: ; %entry 15215; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15216; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15217; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15218; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15219; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15220; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15221; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15222; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15223; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15224; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15225; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15226; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15227; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15228; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15229; GFX940-TGSPLIT-NEXT: s_endpgm 15230; 15231; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15232; GFX11-WGP: ; %bb.0: ; %entry 15233; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15234; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15235; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15236; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15237; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15238; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15239; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15240; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15241; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15242; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15243; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15244; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15245; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15246; GFX11-WGP-NEXT: buffer_gl1_inv 15247; GFX11-WGP-NEXT: buffer_gl0_inv 15248; GFX11-WGP-NEXT: s_endpgm 15249; 15250; GFX11-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15251; GFX11-CU: ; %bb.0: ; %entry 15252; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15253; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15254; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15255; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15256; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15257; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15258; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15259; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15260; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15261; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15262; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15263; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15264; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15265; GFX11-CU-NEXT: buffer_gl1_inv 15266; GFX11-CU-NEXT: buffer_gl0_inv 15267; GFX11-CU-NEXT: s_endpgm 15268; 15269; GFX12-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15270; GFX12-WGP: ; %bb.0: ; %entry 15271; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15272; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15273; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15274; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15275; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15276; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15277; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15278; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15279; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15280; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15281; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15282; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15283; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15284; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15285; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15286; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15287; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15288; GFX12-WGP-NEXT: s_endpgm 15289; 15290; GFX12-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 15291; GFX12-CU: ; %bb.0: ; %entry 15292; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15293; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15294; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15295; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15296; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15297; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15298; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15299; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15300; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15301; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15302; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15303; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15304; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15305; GFX12-CU-NEXT: s_wait_storecnt 0x0 15306; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15307; GFX12-CU-NEXT: s_wait_storecnt 0x0 15308; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15309; GFX12-CU-NEXT: s_endpgm 15310 ptr addrspace(1) %out, i32 %in, i32 %old) { 15311entry: 15312 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15313 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 15314 ret void 15315} 15316 15317define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( 15318; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15319; GFX6: ; %bb.0: ; %entry 15320; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15321; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15322; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15323; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15324; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15325; GFX6-NEXT: s_mov_b32 s12, s5 15326; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15327; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15328; GFX6-NEXT: s_mov_b32 s11, -1 15329; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15330; GFX6-NEXT: s_mov_b32 s5, s12 15331; GFX6-NEXT: s_mov_b32 s6, s11 15332; GFX6-NEXT: s_mov_b32 s7, s10 15333; GFX6-NEXT: v_mov_b32_e32 v0, s9 15334; GFX6-NEXT: v_mov_b32_e32 v2, s8 15335; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15336; GFX6-NEXT: v_mov_b32_e32 v1, v2 15337; GFX6-NEXT: s_waitcnt vmcnt(0) 15338; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15339; GFX6-NEXT: s_waitcnt vmcnt(0) 15340; GFX6-NEXT: buffer_wbinvl1 15341; GFX6-NEXT: s_endpgm 15342; 15343; GFX7-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15344; GFX7: ; %bb.0: ; %entry 15345; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15346; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15347; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15348; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15349; GFX7-NEXT: s_mov_b64 s[10:11], 16 15350; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15351; GFX7-NEXT: s_mov_b32 s4, s8 15352; GFX7-NEXT: s_mov_b32 s5, s9 15353; GFX7-NEXT: s_mov_b32 s9, s10 15354; GFX7-NEXT: s_mov_b32 s8, s11 15355; GFX7-NEXT: s_add_u32 s4, s4, s9 15356; GFX7-NEXT: s_addc_u32 s8, s5, s8 15357; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15358; GFX7-NEXT: s_mov_b32 s5, s8 15359; GFX7-NEXT: v_mov_b32_e32 v2, s7 15360; GFX7-NEXT: v_mov_b32_e32 v0, s6 15361; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15362; GFX7-NEXT: v_mov_b32_e32 v3, v0 15363; GFX7-NEXT: v_mov_b32_e32 v0, s4 15364; GFX7-NEXT: v_mov_b32_e32 v1, s5 15365; GFX7-NEXT: s_waitcnt vmcnt(0) 15366; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15367; GFX7-NEXT: s_waitcnt vmcnt(0) 15368; GFX7-NEXT: buffer_wbinvl1_vol 15369; GFX7-NEXT: s_endpgm 15370; 15371; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15372; GFX10-WGP: ; %bb.0: ; %entry 15373; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15374; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15375; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15376; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15377; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15378; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15379; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15380; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15381; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15382; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15383; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15384; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15385; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15386; GFX10-WGP-NEXT: buffer_gl1_inv 15387; GFX10-WGP-NEXT: buffer_gl0_inv 15388; GFX10-WGP-NEXT: s_endpgm 15389; 15390; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15391; GFX10-CU: ; %bb.0: ; %entry 15392; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15393; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15394; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15395; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15396; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15397; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15398; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15399; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15400; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15401; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15402; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15403; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15404; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15405; GFX10-CU-NEXT: buffer_gl1_inv 15406; GFX10-CU-NEXT: buffer_gl0_inv 15407; GFX10-CU-NEXT: s_endpgm 15408; 15409; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15410; SKIP-CACHE-INV: ; %bb.0: ; %entry 15411; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15412; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15413; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15414; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15415; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15416; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15417; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15418; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15419; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15420; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15421; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15422; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15423; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15424; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15425; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15426; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15427; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15428; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15429; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15430; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15431; SKIP-CACHE-INV-NEXT: s_endpgm 15432; 15433; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15434; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15435; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15436; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15437; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15438; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15439; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15440; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15441; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15442; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15443; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15444; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15445; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15446; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15447; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15448; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15449; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15450; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15451; 15452; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15453; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15454; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15455; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15456; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15457; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15458; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15459; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15460; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15461; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15462; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15463; GFX90A-TGSPLIT-NEXT: buffer_wbl2 15464; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15465; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15466; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15467; GFX90A-TGSPLIT-NEXT: buffer_invl2 15468; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15469; GFX90A-TGSPLIT-NEXT: s_endpgm 15470; 15471; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15472; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15473; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15474; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15475; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15476; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15477; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15478; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15479; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15480; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15481; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15482; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15483; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15484; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15485; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15486; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15487; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15488; 15489; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15490; GFX940-TGSPLIT: ; %bb.0: ; %entry 15491; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15492; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15493; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15494; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15495; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15496; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15497; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15498; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15499; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15500; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15501; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15502; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15503; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15504; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15505; GFX940-TGSPLIT-NEXT: s_endpgm 15506; 15507; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15508; GFX11-WGP: ; %bb.0: ; %entry 15509; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15510; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15511; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15512; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15513; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15514; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15515; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15516; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15517; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15518; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15519; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15520; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15521; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15522; GFX11-WGP-NEXT: buffer_gl1_inv 15523; GFX11-WGP-NEXT: buffer_gl0_inv 15524; GFX11-WGP-NEXT: s_endpgm 15525; 15526; GFX11-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15527; GFX11-CU: ; %bb.0: ; %entry 15528; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15529; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15530; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15531; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15532; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15533; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15534; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15535; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15536; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15537; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15538; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15539; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15540; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15541; GFX11-CU-NEXT: buffer_gl1_inv 15542; GFX11-CU-NEXT: buffer_gl0_inv 15543; GFX11-CU-NEXT: s_endpgm 15544; 15545; GFX12-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15546; GFX12-WGP: ; %bb.0: ; %entry 15547; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15548; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15549; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15550; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15551; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15552; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15553; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15554; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15555; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15556; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15557; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15558; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15559; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15560; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15561; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15562; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15563; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15564; GFX12-WGP-NEXT: s_endpgm 15565; 15566; GFX12-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 15567; GFX12-CU: ; %bb.0: ; %entry 15568; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15569; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15570; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15571; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15572; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15573; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15574; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15575; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15576; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15577; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15578; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15579; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15580; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15581; GFX12-CU-NEXT: s_wait_storecnt 0x0 15582; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15583; GFX12-CU-NEXT: s_wait_storecnt 0x0 15584; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15585; GFX12-CU-NEXT: s_endpgm 15586 ptr addrspace(1) %out, i32 %in, i32 %old) { 15587entry: 15588 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15589 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 15590 ret void 15591} 15592 15593define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( 15594; GFX6-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15595; GFX6: ; %bb.0: ; %entry 15596; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15597; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15598; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15599; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15600; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15601; GFX6-NEXT: s_mov_b32 s12, s5 15602; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15603; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15604; GFX6-NEXT: s_mov_b32 s11, -1 15605; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15606; GFX6-NEXT: s_mov_b32 s5, s12 15607; GFX6-NEXT: s_mov_b32 s6, s11 15608; GFX6-NEXT: s_mov_b32 s7, s10 15609; GFX6-NEXT: v_mov_b32_e32 v0, s9 15610; GFX6-NEXT: v_mov_b32_e32 v2, s8 15611; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15612; GFX6-NEXT: v_mov_b32_e32 v1, v2 15613; GFX6-NEXT: s_waitcnt vmcnt(0) 15614; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15615; GFX6-NEXT: s_waitcnt vmcnt(0) 15616; GFX6-NEXT: buffer_wbinvl1 15617; GFX6-NEXT: s_endpgm 15618; 15619; GFX7-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15620; GFX7: ; %bb.0: ; %entry 15621; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15622; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15623; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15624; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15625; GFX7-NEXT: s_mov_b64 s[10:11], 16 15626; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15627; GFX7-NEXT: s_mov_b32 s4, s8 15628; GFX7-NEXT: s_mov_b32 s5, s9 15629; GFX7-NEXT: s_mov_b32 s9, s10 15630; GFX7-NEXT: s_mov_b32 s8, s11 15631; GFX7-NEXT: s_add_u32 s4, s4, s9 15632; GFX7-NEXT: s_addc_u32 s8, s5, s8 15633; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15634; GFX7-NEXT: s_mov_b32 s5, s8 15635; GFX7-NEXT: v_mov_b32_e32 v2, s7 15636; GFX7-NEXT: v_mov_b32_e32 v0, s6 15637; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15638; GFX7-NEXT: v_mov_b32_e32 v3, v0 15639; GFX7-NEXT: v_mov_b32_e32 v0, s4 15640; GFX7-NEXT: v_mov_b32_e32 v1, s5 15641; GFX7-NEXT: s_waitcnt vmcnt(0) 15642; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15643; GFX7-NEXT: s_waitcnt vmcnt(0) 15644; GFX7-NEXT: buffer_wbinvl1_vol 15645; GFX7-NEXT: s_endpgm 15646; 15647; GFX10-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15648; GFX10-WGP: ; %bb.0: ; %entry 15649; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15650; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15651; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15652; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15653; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15654; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15655; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15656; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15657; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15658; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15659; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15660; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15661; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15662; GFX10-WGP-NEXT: buffer_gl1_inv 15663; GFX10-WGP-NEXT: buffer_gl0_inv 15664; GFX10-WGP-NEXT: s_endpgm 15665; 15666; GFX10-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15667; GFX10-CU: ; %bb.0: ; %entry 15668; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15669; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15670; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15671; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15672; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15673; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15674; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15675; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15676; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15677; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15678; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15679; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15680; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15681; GFX10-CU-NEXT: buffer_gl1_inv 15682; GFX10-CU-NEXT: buffer_gl0_inv 15683; GFX10-CU-NEXT: s_endpgm 15684; 15685; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15686; SKIP-CACHE-INV: ; %bb.0: ; %entry 15687; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15688; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15689; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15690; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15691; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15692; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15693; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15694; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15695; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15696; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15697; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15698; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15699; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15700; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15701; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15702; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15703; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15704; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15705; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15706; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15707; SKIP-CACHE-INV-NEXT: s_endpgm 15708; 15709; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15710; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15711; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15712; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15713; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15714; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15715; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15716; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15717; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15718; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15719; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15720; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15721; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15722; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15723; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15724; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15725; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15726; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15727; 15728; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15729; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15730; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15731; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15732; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15733; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15734; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15735; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15736; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15737; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15738; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15739; GFX90A-TGSPLIT-NEXT: buffer_wbl2 15740; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15741; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15742; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15743; GFX90A-TGSPLIT-NEXT: buffer_invl2 15744; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15745; GFX90A-TGSPLIT-NEXT: s_endpgm 15746; 15747; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15748; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15749; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15750; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15751; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15752; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15753; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15754; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15755; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15756; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15757; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15758; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15759; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15760; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15761; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15762; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15763; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15764; 15765; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15766; GFX940-TGSPLIT: ; %bb.0: ; %entry 15767; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15768; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15769; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15770; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15771; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15772; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15773; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15774; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15775; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15776; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15777; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15778; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 15779; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15780; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15781; GFX940-TGSPLIT-NEXT: s_endpgm 15782; 15783; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15784; GFX11-WGP: ; %bb.0: ; %entry 15785; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15786; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15787; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15788; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15789; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15790; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15791; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15792; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15793; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15794; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15795; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15796; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15797; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15798; GFX11-WGP-NEXT: buffer_gl1_inv 15799; GFX11-WGP-NEXT: buffer_gl0_inv 15800; GFX11-WGP-NEXT: s_endpgm 15801; 15802; GFX11-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15803; GFX11-CU: ; %bb.0: ; %entry 15804; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15805; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15806; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15807; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15808; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15809; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15810; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15811; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15812; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15813; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15814; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15815; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15816; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15817; GFX11-CU-NEXT: buffer_gl1_inv 15818; GFX11-CU-NEXT: buffer_gl0_inv 15819; GFX11-CU-NEXT: s_endpgm 15820; 15821; GFX12-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15822; GFX12-WGP: ; %bb.0: ; %entry 15823; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15824; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15825; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15826; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15827; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15828; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15829; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15830; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15831; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15832; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15833; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15834; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15835; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15836; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15837; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15838; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15839; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15840; GFX12-WGP-NEXT: s_endpgm 15841; 15842; GFX12-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: 15843; GFX12-CU: ; %bb.0: ; %entry 15844; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15845; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15846; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15847; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15848; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15849; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15850; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15851; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15852; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15853; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15854; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15855; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15856; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15857; GFX12-CU-NEXT: s_wait_storecnt 0x0 15858; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 15859; GFX12-CU-NEXT: s_wait_storecnt 0x0 15860; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15861; GFX12-CU-NEXT: s_endpgm 15862 ptr addrspace(1) %out, i32 %in, i32 %old) { 15863entry: 15864 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15865 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst 15866 ret void 15867} 15868 15869define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( 15870; GFX6-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15871; GFX6: ; %bb.0: ; %entry 15872; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15873; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15874; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15875; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15876; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15877; GFX6-NEXT: s_mov_b32 s12, s5 15878; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15879; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15880; GFX6-NEXT: s_mov_b32 s11, -1 15881; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15882; GFX6-NEXT: s_mov_b32 s5, s12 15883; GFX6-NEXT: s_mov_b32 s6, s11 15884; GFX6-NEXT: s_mov_b32 s7, s10 15885; GFX6-NEXT: v_mov_b32_e32 v0, s9 15886; GFX6-NEXT: v_mov_b32_e32 v2, s8 15887; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15888; GFX6-NEXT: v_mov_b32_e32 v1, v2 15889; GFX6-NEXT: s_waitcnt vmcnt(0) 15890; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15891; GFX6-NEXT: s_waitcnt vmcnt(0) 15892; GFX6-NEXT: buffer_wbinvl1 15893; GFX6-NEXT: s_endpgm 15894; 15895; GFX7-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15896; GFX7: ; %bb.0: ; %entry 15897; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15898; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15899; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15900; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15901; GFX7-NEXT: s_mov_b64 s[10:11], 16 15902; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15903; GFX7-NEXT: s_mov_b32 s4, s8 15904; GFX7-NEXT: s_mov_b32 s5, s9 15905; GFX7-NEXT: s_mov_b32 s9, s10 15906; GFX7-NEXT: s_mov_b32 s8, s11 15907; GFX7-NEXT: s_add_u32 s4, s4, s9 15908; GFX7-NEXT: s_addc_u32 s8, s5, s8 15909; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15910; GFX7-NEXT: s_mov_b32 s5, s8 15911; GFX7-NEXT: v_mov_b32_e32 v2, s7 15912; GFX7-NEXT: v_mov_b32_e32 v0, s6 15913; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15914; GFX7-NEXT: v_mov_b32_e32 v3, v0 15915; GFX7-NEXT: v_mov_b32_e32 v0, s4 15916; GFX7-NEXT: v_mov_b32_e32 v1, s5 15917; GFX7-NEXT: s_waitcnt vmcnt(0) 15918; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15919; GFX7-NEXT: s_waitcnt vmcnt(0) 15920; GFX7-NEXT: buffer_wbinvl1_vol 15921; GFX7-NEXT: s_endpgm 15922; 15923; GFX10-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15924; GFX10-WGP: ; %bb.0: ; %entry 15925; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15926; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15927; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15928; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15929; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15930; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15931; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15932; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15933; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15934; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15935; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15936; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15937; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15938; GFX10-WGP-NEXT: buffer_gl1_inv 15939; GFX10-WGP-NEXT: buffer_gl0_inv 15940; GFX10-WGP-NEXT: s_endpgm 15941; 15942; GFX10-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15943; GFX10-CU: ; %bb.0: ; %entry 15944; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15945; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15946; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15947; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15948; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15949; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15950; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15951; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15952; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15953; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15954; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15955; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15956; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15957; GFX10-CU-NEXT: buffer_gl1_inv 15958; GFX10-CU-NEXT: buffer_gl0_inv 15959; GFX10-CU-NEXT: s_endpgm 15960; 15961; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15962; SKIP-CACHE-INV: ; %bb.0: ; %entry 15963; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15964; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15965; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15966; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15967; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15968; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15969; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15970; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15971; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15972; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15973; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15974; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15975; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15976; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15977; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15978; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15979; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15980; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15981; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15982; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15983; SKIP-CACHE-INV-NEXT: s_endpgm 15984; 15985; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 15986; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15987; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15988; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15989; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15990; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15991; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15992; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15993; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15994; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15995; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15996; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15997; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15998; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15999; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16000; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16001; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16002; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16003; 16004; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16005; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16007; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16008; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16009; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16010; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16011; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16012; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16013; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16014; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16015; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16016; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16017; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16018; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16019; GFX90A-TGSPLIT-NEXT: buffer_invl2 16020; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16021; GFX90A-TGSPLIT-NEXT: s_endpgm 16022; 16023; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16024; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16025; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16026; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16027; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16028; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16029; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16030; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16031; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16032; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16034; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16035; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16036; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16038; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16039; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16040; 16041; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16042; GFX940-TGSPLIT: ; %bb.0: ; %entry 16043; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16044; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16045; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16046; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16047; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16048; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16049; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16050; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16051; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16052; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16053; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16054; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16055; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16056; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16057; GFX940-TGSPLIT-NEXT: s_endpgm 16058; 16059; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16060; GFX11-WGP: ; %bb.0: ; %entry 16061; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16062; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16063; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16064; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16065; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16066; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16067; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16068; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16069; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16070; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16071; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16072; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16073; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16074; GFX11-WGP-NEXT: buffer_gl1_inv 16075; GFX11-WGP-NEXT: buffer_gl0_inv 16076; GFX11-WGP-NEXT: s_endpgm 16077; 16078; GFX11-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16079; GFX11-CU: ; %bb.0: ; %entry 16080; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16081; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16082; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16083; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16084; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16085; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16086; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16087; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16088; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16089; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16090; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16091; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16092; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16093; GFX11-CU-NEXT: buffer_gl1_inv 16094; GFX11-CU-NEXT: buffer_gl0_inv 16095; GFX11-CU-NEXT: s_endpgm 16096; 16097; GFX12-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16098; GFX12-WGP: ; %bb.0: ; %entry 16099; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16100; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16101; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16102; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16103; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16104; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16105; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16106; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16107; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16108; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16109; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16110; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16111; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16112; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16113; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16114; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16115; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16116; GFX12-WGP-NEXT: s_endpgm 16117; 16118; GFX12-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: 16119; GFX12-CU: ; %bb.0: ; %entry 16120; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16121; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16122; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16123; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16124; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16125; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16126; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16127; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16128; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16129; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16130; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16131; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16132; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16133; GFX12-CU-NEXT: s_wait_storecnt 0x0 16134; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16135; GFX12-CU-NEXT: s_wait_storecnt 0x0 16136; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16137; GFX12-CU-NEXT: s_endpgm 16138 ptr addrspace(1) %out, i32 %in, i32 %old) { 16139entry: 16140 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16141 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst 16142 ret void 16143} 16144 16145define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( 16146; GFX6-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16147; GFX6: ; %bb.0: ; %entry 16148; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16149; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16150; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16151; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16152; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16153; GFX6-NEXT: s_mov_b32 s12, s5 16154; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16155; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16156; GFX6-NEXT: s_mov_b32 s11, -1 16157; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16158; GFX6-NEXT: s_mov_b32 s5, s12 16159; GFX6-NEXT: s_mov_b32 s6, s11 16160; GFX6-NEXT: s_mov_b32 s7, s10 16161; GFX6-NEXT: v_mov_b32_e32 v0, s9 16162; GFX6-NEXT: v_mov_b32_e32 v2, s8 16163; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16164; GFX6-NEXT: v_mov_b32_e32 v1, v2 16165; GFX6-NEXT: s_waitcnt vmcnt(0) 16166; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16167; GFX6-NEXT: s_waitcnt vmcnt(0) 16168; GFX6-NEXT: buffer_wbinvl1 16169; GFX6-NEXT: s_endpgm 16170; 16171; GFX7-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16172; GFX7: ; %bb.0: ; %entry 16173; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16174; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16175; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16176; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16177; GFX7-NEXT: s_mov_b64 s[10:11], 16 16178; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16179; GFX7-NEXT: s_mov_b32 s4, s8 16180; GFX7-NEXT: s_mov_b32 s5, s9 16181; GFX7-NEXT: s_mov_b32 s9, s10 16182; GFX7-NEXT: s_mov_b32 s8, s11 16183; GFX7-NEXT: s_add_u32 s4, s4, s9 16184; GFX7-NEXT: s_addc_u32 s8, s5, s8 16185; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16186; GFX7-NEXT: s_mov_b32 s5, s8 16187; GFX7-NEXT: v_mov_b32_e32 v2, s7 16188; GFX7-NEXT: v_mov_b32_e32 v0, s6 16189; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16190; GFX7-NEXT: v_mov_b32_e32 v3, v0 16191; GFX7-NEXT: v_mov_b32_e32 v0, s4 16192; GFX7-NEXT: v_mov_b32_e32 v1, s5 16193; GFX7-NEXT: s_waitcnt vmcnt(0) 16194; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16195; GFX7-NEXT: s_waitcnt vmcnt(0) 16196; GFX7-NEXT: buffer_wbinvl1_vol 16197; GFX7-NEXT: s_endpgm 16198; 16199; GFX10-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16200; GFX10-WGP: ; %bb.0: ; %entry 16201; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16202; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16203; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16204; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16205; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16206; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16207; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16208; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16209; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16210; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16211; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16212; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16213; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16214; GFX10-WGP-NEXT: buffer_gl1_inv 16215; GFX10-WGP-NEXT: buffer_gl0_inv 16216; GFX10-WGP-NEXT: s_endpgm 16217; 16218; GFX10-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16219; GFX10-CU: ; %bb.0: ; %entry 16220; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16221; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16222; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16223; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16224; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16225; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16226; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16227; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16228; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16229; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16230; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16231; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16232; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16233; GFX10-CU-NEXT: buffer_gl1_inv 16234; GFX10-CU-NEXT: buffer_gl0_inv 16235; GFX10-CU-NEXT: s_endpgm 16236; 16237; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16238; SKIP-CACHE-INV: ; %bb.0: ; %entry 16239; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16240; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16241; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16242; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16243; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16244; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16245; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16246; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16247; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16248; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16249; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16250; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16251; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16252; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16253; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16254; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16255; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16256; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16257; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16258; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16259; SKIP-CACHE-INV-NEXT: s_endpgm 16260; 16261; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16262; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16263; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16264; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16265; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16266; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16267; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16268; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16269; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16270; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16271; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16272; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16273; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16274; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16275; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16276; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16277; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16278; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16279; 16280; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16281; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16282; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16283; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16284; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16285; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16286; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16287; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16288; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16289; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16290; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16291; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16292; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16293; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16294; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16295; GFX90A-TGSPLIT-NEXT: buffer_invl2 16296; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16297; GFX90A-TGSPLIT-NEXT: s_endpgm 16298; 16299; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16300; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16301; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16302; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16303; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16304; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16305; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16306; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16307; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16308; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16309; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16310; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16311; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16312; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16313; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16314; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16315; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16316; 16317; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16318; GFX940-TGSPLIT: ; %bb.0: ; %entry 16319; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16320; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16321; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16322; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16323; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16324; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16325; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16326; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16327; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16328; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16329; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16330; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16331; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16332; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16333; GFX940-TGSPLIT-NEXT: s_endpgm 16334; 16335; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16336; GFX11-WGP: ; %bb.0: ; %entry 16337; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16338; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16339; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16340; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16341; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16342; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16343; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16344; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16345; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16346; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16347; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16348; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16349; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16350; GFX11-WGP-NEXT: buffer_gl1_inv 16351; GFX11-WGP-NEXT: buffer_gl0_inv 16352; GFX11-WGP-NEXT: s_endpgm 16353; 16354; GFX11-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16355; GFX11-CU: ; %bb.0: ; %entry 16356; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16357; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16358; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16359; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16360; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16361; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16362; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16363; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16364; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16365; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16366; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16367; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16368; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16369; GFX11-CU-NEXT: buffer_gl1_inv 16370; GFX11-CU-NEXT: buffer_gl0_inv 16371; GFX11-CU-NEXT: s_endpgm 16372; 16373; GFX12-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16374; GFX12-WGP: ; %bb.0: ; %entry 16375; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16376; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16377; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16378; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16379; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16380; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16381; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16382; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16383; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16384; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16385; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16386; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16387; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16388; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16389; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16390; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16391; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16392; GFX12-WGP-NEXT: s_endpgm 16393; 16394; GFX12-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg: 16395; GFX12-CU: ; %bb.0: ; %entry 16396; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16397; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16398; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16399; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16400; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16401; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16402; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16403; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16404; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16405; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16406; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16407; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16408; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16409; GFX12-CU-NEXT: s_wait_storecnt 0x0 16410; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16411; GFX12-CU-NEXT: s_wait_storecnt 0x0 16412; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16413; GFX12-CU-NEXT: s_endpgm 16414 ptr addrspace(1) %out, i32 %in, i32 %old) { 16415entry: 16416 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16417 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst 16418 ret void 16419} 16420 16421define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( 16422; GFX6-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16423; GFX6: ; %bb.0: ; %entry 16424; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16425; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16426; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16427; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16428; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16429; GFX6-NEXT: s_mov_b32 s12, s5 16430; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16431; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16432; GFX6-NEXT: s_mov_b32 s11, -1 16433; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16434; GFX6-NEXT: s_mov_b32 s5, s12 16435; GFX6-NEXT: s_mov_b32 s6, s11 16436; GFX6-NEXT: s_mov_b32 s7, s10 16437; GFX6-NEXT: v_mov_b32_e32 v0, s9 16438; GFX6-NEXT: v_mov_b32_e32 v2, s8 16439; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16440; GFX6-NEXT: v_mov_b32_e32 v1, v2 16441; GFX6-NEXT: s_waitcnt vmcnt(0) 16442; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16443; GFX6-NEXT: s_waitcnt vmcnt(0) 16444; GFX6-NEXT: buffer_wbinvl1 16445; GFX6-NEXT: s_endpgm 16446; 16447; GFX7-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16448; GFX7: ; %bb.0: ; %entry 16449; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16450; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16451; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16452; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16453; GFX7-NEXT: s_mov_b64 s[10:11], 16 16454; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16455; GFX7-NEXT: s_mov_b32 s4, s8 16456; GFX7-NEXT: s_mov_b32 s5, s9 16457; GFX7-NEXT: s_mov_b32 s9, s10 16458; GFX7-NEXT: s_mov_b32 s8, s11 16459; GFX7-NEXT: s_add_u32 s4, s4, s9 16460; GFX7-NEXT: s_addc_u32 s8, s5, s8 16461; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16462; GFX7-NEXT: s_mov_b32 s5, s8 16463; GFX7-NEXT: v_mov_b32_e32 v2, s7 16464; GFX7-NEXT: v_mov_b32_e32 v0, s6 16465; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16466; GFX7-NEXT: v_mov_b32_e32 v3, v0 16467; GFX7-NEXT: v_mov_b32_e32 v0, s4 16468; GFX7-NEXT: v_mov_b32_e32 v1, s5 16469; GFX7-NEXT: s_waitcnt vmcnt(0) 16470; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16471; GFX7-NEXT: s_waitcnt vmcnt(0) 16472; GFX7-NEXT: buffer_wbinvl1_vol 16473; GFX7-NEXT: s_endpgm 16474; 16475; GFX10-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16476; GFX10-WGP: ; %bb.0: ; %entry 16477; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16478; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16479; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16480; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16481; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16482; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16483; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16484; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16485; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16486; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16487; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16488; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16489; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16490; GFX10-WGP-NEXT: buffer_gl1_inv 16491; GFX10-WGP-NEXT: buffer_gl0_inv 16492; GFX10-WGP-NEXT: s_endpgm 16493; 16494; GFX10-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16495; GFX10-CU: ; %bb.0: ; %entry 16496; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16497; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16498; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16499; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16500; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16501; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16502; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16503; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16504; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16505; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16506; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16507; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16508; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16509; GFX10-CU-NEXT: buffer_gl1_inv 16510; GFX10-CU-NEXT: buffer_gl0_inv 16511; GFX10-CU-NEXT: s_endpgm 16512; 16513; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16514; SKIP-CACHE-INV: ; %bb.0: ; %entry 16515; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16516; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16517; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16518; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16519; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16520; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16521; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16522; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16523; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16524; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16525; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16526; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16527; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16528; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16529; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16530; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16531; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16532; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16533; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16534; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16535; SKIP-CACHE-INV-NEXT: s_endpgm 16536; 16537; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16538; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16539; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16540; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16541; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16542; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16543; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16544; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16545; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16546; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16547; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16548; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16549; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16550; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16551; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16552; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16553; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16554; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16555; 16556; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16557; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16558; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16559; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16560; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16561; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16562; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16563; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16564; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16565; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16566; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16567; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16568; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16569; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16570; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16571; GFX90A-TGSPLIT-NEXT: buffer_invl2 16572; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16573; GFX90A-TGSPLIT-NEXT: s_endpgm 16574; 16575; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16576; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16577; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16578; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16579; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16580; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16581; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16582; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16583; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16584; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16585; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16586; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16587; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16588; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16589; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16590; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16591; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16592; 16593; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16594; GFX940-TGSPLIT: ; %bb.0: ; %entry 16595; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16596; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16597; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16598; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16599; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16600; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16601; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16602; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16603; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16604; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16605; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16606; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16607; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16608; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16609; GFX940-TGSPLIT-NEXT: s_endpgm 16610; 16611; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16612; GFX11-WGP: ; %bb.0: ; %entry 16613; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16614; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16615; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16616; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16617; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16618; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16619; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16620; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16621; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16622; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16623; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16624; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16625; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16626; GFX11-WGP-NEXT: buffer_gl1_inv 16627; GFX11-WGP-NEXT: buffer_gl0_inv 16628; GFX11-WGP-NEXT: s_endpgm 16629; 16630; GFX11-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16631; GFX11-CU: ; %bb.0: ; %entry 16632; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16633; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16634; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16635; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16636; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16637; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16638; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16639; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16640; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16641; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16642; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16643; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16644; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16645; GFX11-CU-NEXT: buffer_gl1_inv 16646; GFX11-CU-NEXT: buffer_gl0_inv 16647; GFX11-CU-NEXT: s_endpgm 16648; 16649; GFX12-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16650; GFX12-WGP: ; %bb.0: ; %entry 16651; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16652; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16653; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16654; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16655; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16656; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16657; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16658; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16659; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16660; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16661; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16662; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16663; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16664; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16665; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16666; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16667; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16668; GFX12-WGP-NEXT: s_endpgm 16669; 16670; GFX12-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: 16671; GFX12-CU: ; %bb.0: ; %entry 16672; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16673; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16674; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16675; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16676; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16677; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16678; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16679; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16680; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16681; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16682; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16683; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16684; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16685; GFX12-CU-NEXT: s_wait_storecnt 0x0 16686; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16687; GFX12-CU-NEXT: s_wait_storecnt 0x0 16688; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16689; GFX12-CU-NEXT: s_endpgm 16690 ptr addrspace(1) %out, i32 %in, i32 %old) { 16691entry: 16692 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16693 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst 16694 ret void 16695} 16696 16697define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( 16698; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16699; GFX6: ; %bb.0: ; %entry 16700; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16701; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16702; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16703; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16704; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16705; GFX6-NEXT: s_mov_b32 s12, s5 16706; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16707; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16708; GFX6-NEXT: s_mov_b32 s11, -1 16709; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16710; GFX6-NEXT: s_mov_b32 s5, s12 16711; GFX6-NEXT: s_mov_b32 s6, s11 16712; GFX6-NEXT: s_mov_b32 s7, s10 16713; GFX6-NEXT: v_mov_b32_e32 v0, s9 16714; GFX6-NEXT: v_mov_b32_e32 v2, s8 16715; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16716; GFX6-NEXT: v_mov_b32_e32 v1, v2 16717; GFX6-NEXT: s_waitcnt vmcnt(0) 16718; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 16719; GFX6-NEXT: s_waitcnt vmcnt(0) 16720; GFX6-NEXT: buffer_wbinvl1 16721; GFX6-NEXT: s_endpgm 16722; 16723; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16724; GFX7: ; %bb.0: ; %entry 16725; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16726; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16727; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16728; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16729; GFX7-NEXT: s_mov_b64 s[10:11], 16 16730; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16731; GFX7-NEXT: s_mov_b32 s4, s8 16732; GFX7-NEXT: s_mov_b32 s5, s9 16733; GFX7-NEXT: s_mov_b32 s9, s10 16734; GFX7-NEXT: s_mov_b32 s8, s11 16735; GFX7-NEXT: s_add_u32 s4, s4, s9 16736; GFX7-NEXT: s_addc_u32 s8, s5, s8 16737; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16738; GFX7-NEXT: s_mov_b32 s5, s8 16739; GFX7-NEXT: v_mov_b32_e32 v2, s7 16740; GFX7-NEXT: v_mov_b32_e32 v0, s6 16741; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16742; GFX7-NEXT: v_mov_b32_e32 v3, v0 16743; GFX7-NEXT: v_mov_b32_e32 v0, s4 16744; GFX7-NEXT: v_mov_b32_e32 v1, s5 16745; GFX7-NEXT: s_waitcnt vmcnt(0) 16746; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16747; GFX7-NEXT: s_waitcnt vmcnt(0) 16748; GFX7-NEXT: buffer_wbinvl1_vol 16749; GFX7-NEXT: s_endpgm 16750; 16751; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16752; GFX10-WGP: ; %bb.0: ; %entry 16753; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16754; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16755; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16756; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16757; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16758; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16759; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16760; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16761; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16762; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16763; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16764; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16765; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16766; GFX10-WGP-NEXT: buffer_gl1_inv 16767; GFX10-WGP-NEXT: buffer_gl0_inv 16768; GFX10-WGP-NEXT: s_endpgm 16769; 16770; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16771; GFX10-CU: ; %bb.0: ; %entry 16772; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16773; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16774; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16775; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16776; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16777; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16778; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16779; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16780; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16781; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16782; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16783; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 16784; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16785; GFX10-CU-NEXT: buffer_gl1_inv 16786; GFX10-CU-NEXT: buffer_gl0_inv 16787; GFX10-CU-NEXT: s_endpgm 16788; 16789; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16790; SKIP-CACHE-INV: ; %bb.0: ; %entry 16791; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16792; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16793; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16794; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16795; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16796; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16797; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16798; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16799; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16800; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16801; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16802; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16803; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16804; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16805; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16806; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16807; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16808; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16809; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 16810; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16811; SKIP-CACHE-INV-NEXT: s_endpgm 16812; 16813; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16814; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16815; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16816; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16817; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16818; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16819; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16820; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16821; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16822; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16823; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16824; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16825; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16826; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16827; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16828; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16829; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16830; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16831; 16832; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16833; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16834; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16835; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16836; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16837; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16838; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16839; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16841; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16842; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16843; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16844; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16845; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 16846; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16847; GFX90A-TGSPLIT-NEXT: buffer_invl2 16848; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16849; GFX90A-TGSPLIT-NEXT: s_endpgm 16850; 16851; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16852; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16853; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16854; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16855; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16856; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16857; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16858; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16859; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16860; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16861; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16862; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16863; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16864; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16865; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16866; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16867; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16868; 16869; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16870; GFX940-TGSPLIT: ; %bb.0: ; %entry 16871; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16872; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16873; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16874; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16875; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16876; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16877; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16878; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16879; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16880; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16881; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16882; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 16883; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16884; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16885; GFX940-TGSPLIT-NEXT: s_endpgm 16886; 16887; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16888; GFX11-WGP: ; %bb.0: ; %entry 16889; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16890; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16891; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16892; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16893; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16894; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16895; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16896; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16897; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16898; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16899; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16900; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16901; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16902; GFX11-WGP-NEXT: buffer_gl1_inv 16903; GFX11-WGP-NEXT: buffer_gl0_inv 16904; GFX11-WGP-NEXT: s_endpgm 16905; 16906; GFX11-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16907; GFX11-CU: ; %bb.0: ; %entry 16908; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16909; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16910; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16911; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16912; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16913; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16914; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16915; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16916; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16917; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16918; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16919; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 16920; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16921; GFX11-CU-NEXT: buffer_gl1_inv 16922; GFX11-CU-NEXT: buffer_gl0_inv 16923; GFX11-CU-NEXT: s_endpgm 16924; 16925; GFX12-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16926; GFX12-WGP: ; %bb.0: ; %entry 16927; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16928; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16929; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16930; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16931; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16932; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16933; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16934; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16935; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16936; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16937; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16938; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16939; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16940; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16941; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16942; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16943; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16944; GFX12-WGP-NEXT: s_endpgm 16945; 16946; GFX12-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 16947; GFX12-CU: ; %bb.0: ; %entry 16948; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16949; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16950; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16951; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16952; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16953; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16954; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16955; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16956; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16957; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16958; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16959; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16960; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16961; GFX12-CU-NEXT: s_wait_storecnt 0x0 16962; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS 16963; GFX12-CU-NEXT: s_wait_storecnt 0x0 16964; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16965; GFX12-CU-NEXT: s_endpgm 16966 ptr addrspace(1) %out, i32 %in, i32 %old) { 16967entry: 16968 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16969 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 16970 ret void 16971} 16972 16973define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg( 16974; GFX6-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 16975; GFX6: ; %bb.0: ; %entry 16976; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16977; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16978; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16979; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16980; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16981; GFX6-NEXT: s_mov_b32 s12, s5 16982; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16983; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16984; GFX6-NEXT: s_mov_b32 s11, -1 16985; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16986; GFX6-NEXT: s_mov_b32 s5, s12 16987; GFX6-NEXT: s_mov_b32 s6, s11 16988; GFX6-NEXT: s_mov_b32 s7, s10 16989; GFX6-NEXT: v_mov_b32_e32 v0, s9 16990; GFX6-NEXT: v_mov_b32_e32 v2, s8 16991; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16992; GFX6-NEXT: v_mov_b32_e32 v1, v2 16993; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 16994; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16995; GFX6-NEXT: s_waitcnt vmcnt(0) 16996; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 16997; GFX6-NEXT: s_endpgm 16998; 16999; GFX7-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17000; GFX7: ; %bb.0: ; %entry 17001; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17002; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17003; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17004; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17005; GFX7-NEXT: s_mov_b64 s[12:13], 16 17006; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17007; GFX7-NEXT: s_mov_b32 s6, s4 17008; GFX7-NEXT: s_mov_b32 s7, s5 17009; GFX7-NEXT: s_mov_b32 s11, s12 17010; GFX7-NEXT: s_mov_b32 s10, s13 17011; GFX7-NEXT: s_add_u32 s6, s6, s11 17012; GFX7-NEXT: s_addc_u32 s10, s7, s10 17013; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17014; GFX7-NEXT: s_mov_b32 s7, s10 17015; GFX7-NEXT: v_mov_b32_e32 v2, s9 17016; GFX7-NEXT: v_mov_b32_e32 v0, s8 17017; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17018; GFX7-NEXT: v_mov_b32_e32 v3, v0 17019; GFX7-NEXT: v_mov_b32_e32 v0, s6 17020; GFX7-NEXT: v_mov_b32_e32 v1, s7 17021; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17022; GFX7-NEXT: v_mov_b32_e32 v0, s4 17023; GFX7-NEXT: v_mov_b32_e32 v1, s5 17024; GFX7-NEXT: s_waitcnt vmcnt(0) 17025; GFX7-NEXT: flat_store_dword v[0:1], v2 17026; GFX7-NEXT: s_endpgm 17027; 17028; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17029; GFX10-WGP: ; %bb.0: ; %entry 17030; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17031; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17032; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17033; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17034; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17035; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17036; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17037; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17038; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17039; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17040; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17041; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17042; GFX10-WGP-NEXT: s_endpgm 17043; 17044; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17045; GFX10-CU: ; %bb.0: ; %entry 17046; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17047; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17048; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17049; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17050; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17051; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17052; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17053; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17054; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17055; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17056; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17057; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17058; GFX10-CU-NEXT: s_endpgm 17059; 17060; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17061; SKIP-CACHE-INV: ; %bb.0: ; %entry 17062; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17063; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17064; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17065; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17066; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17067; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17068; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17069; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17070; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17071; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17072; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17073; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17074; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17075; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17076; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17077; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17078; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17079; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17080; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17081; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17082; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17083; SKIP-CACHE-INV-NEXT: s_endpgm 17084; 17085; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17086; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17087; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17088; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17089; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17090; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17091; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17092; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17093; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17094; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17095; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17096; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17097; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17098; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17099; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17100; 17101; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17102; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17103; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17104; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17105; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17106; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17107; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17108; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17109; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17110; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17111; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17112; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17113; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17114; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17115; GFX90A-TGSPLIT-NEXT: s_endpgm 17116; 17117; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17118; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17119; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17120; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17121; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17122; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17123; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17124; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17125; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17126; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17127; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17128; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17129; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17130; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17131; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17132; 17133; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17134; GFX940-TGSPLIT: ; %bb.0: ; %entry 17135; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17136; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17137; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17138; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17139; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17140; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17141; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17142; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17143; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17144; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17145; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17146; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17147; GFX940-TGSPLIT-NEXT: s_endpgm 17148; 17149; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17150; GFX11-WGP: ; %bb.0: ; %entry 17151; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17152; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17153; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17154; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17155; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17156; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17157; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17158; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17159; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17160; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17161; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17162; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17163; GFX11-WGP-NEXT: s_endpgm 17164; 17165; GFX11-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17166; GFX11-CU: ; %bb.0: ; %entry 17167; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17168; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17169; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17170; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17171; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17172; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17173; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17174; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17175; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17176; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17177; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17178; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17179; GFX11-CU-NEXT: s_endpgm 17180; 17181; GFX12-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17182; GFX12-WGP: ; %bb.0: ; %entry 17183; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17184; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17185; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17186; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17187; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17188; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17189; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17190; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17191; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17192; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17193; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17194; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17195; GFX12-WGP-NEXT: s_endpgm 17196; 17197; GFX12-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: 17198; GFX12-CU: ; %bb.0: ; %entry 17199; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17200; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17201; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17202; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17203; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17204; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17205; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17206; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17207; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17208; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17209; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17210; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17211; GFX12-CU-NEXT: s_endpgm 17212 ptr addrspace(1) %out, i32 %in, i32 %old) { 17213entry: 17214 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17215 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 17216 %val0 = extractvalue { i32, i1 } %val, 0 17217 store i32 %val0, ptr addrspace(1) %out, align 4 17218 ret void 17219} 17220 17221define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( 17222; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17223; GFX6: ; %bb.0: ; %entry 17224; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17225; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17226; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17227; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17228; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17229; GFX6-NEXT: s_mov_b32 s12, s5 17230; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17231; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17232; GFX6-NEXT: s_mov_b32 s11, -1 17233; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17234; GFX6-NEXT: s_mov_b32 s5, s12 17235; GFX6-NEXT: s_mov_b32 s6, s11 17236; GFX6-NEXT: s_mov_b32 s7, s10 17237; GFX6-NEXT: v_mov_b32_e32 v0, s9 17238; GFX6-NEXT: v_mov_b32_e32 v2, s8 17239; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17240; GFX6-NEXT: v_mov_b32_e32 v1, v2 17241; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17242; GFX6-NEXT: s_waitcnt vmcnt(0) 17243; GFX6-NEXT: buffer_wbinvl1 17244; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17245; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17246; GFX6-NEXT: s_endpgm 17247; 17248; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17249; GFX7: ; %bb.0: ; %entry 17250; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17251; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17252; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17253; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17254; GFX7-NEXT: s_mov_b64 s[12:13], 16 17255; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17256; GFX7-NEXT: s_mov_b32 s6, s4 17257; GFX7-NEXT: s_mov_b32 s7, s5 17258; GFX7-NEXT: s_mov_b32 s11, s12 17259; GFX7-NEXT: s_mov_b32 s10, s13 17260; GFX7-NEXT: s_add_u32 s6, s6, s11 17261; GFX7-NEXT: s_addc_u32 s10, s7, s10 17262; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17263; GFX7-NEXT: s_mov_b32 s7, s10 17264; GFX7-NEXT: v_mov_b32_e32 v2, s9 17265; GFX7-NEXT: v_mov_b32_e32 v0, s8 17266; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17267; GFX7-NEXT: v_mov_b32_e32 v3, v0 17268; GFX7-NEXT: v_mov_b32_e32 v0, s6 17269; GFX7-NEXT: v_mov_b32_e32 v1, s7 17270; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17271; GFX7-NEXT: s_waitcnt vmcnt(0) 17272; GFX7-NEXT: buffer_wbinvl1_vol 17273; GFX7-NEXT: v_mov_b32_e32 v0, s4 17274; GFX7-NEXT: v_mov_b32_e32 v1, s5 17275; GFX7-NEXT: flat_store_dword v[0:1], v2 17276; GFX7-NEXT: s_endpgm 17277; 17278; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17279; GFX10-WGP: ; %bb.0: ; %entry 17280; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17281; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17282; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17283; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17284; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17285; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17286; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17287; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17288; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17289; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17290; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17291; GFX10-WGP-NEXT: buffer_gl1_inv 17292; GFX10-WGP-NEXT: buffer_gl0_inv 17293; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17294; GFX10-WGP-NEXT: s_endpgm 17295; 17296; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17297; GFX10-CU: ; %bb.0: ; %entry 17298; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17299; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17300; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17301; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17302; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17303; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17304; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17305; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17306; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17307; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17308; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17309; GFX10-CU-NEXT: buffer_gl1_inv 17310; GFX10-CU-NEXT: buffer_gl0_inv 17311; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17312; GFX10-CU-NEXT: s_endpgm 17313; 17314; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17315; SKIP-CACHE-INV: ; %bb.0: ; %entry 17316; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17317; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17318; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17319; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17320; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17321; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17322; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17323; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17324; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17325; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17326; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17327; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17328; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17329; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17330; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17331; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17332; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17333; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17334; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17335; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17336; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17337; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17338; SKIP-CACHE-INV-NEXT: s_endpgm 17339; 17340; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17341; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17342; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17343; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17344; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17345; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17346; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17347; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17348; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17349; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17350; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17351; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17352; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17353; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17354; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17355; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17356; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17357; 17358; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17359; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17360; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17361; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17362; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17363; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17364; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17365; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17366; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17367; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17368; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17369; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17370; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17371; GFX90A-TGSPLIT-NEXT: buffer_invl2 17372; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17373; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17374; GFX90A-TGSPLIT-NEXT: s_endpgm 17375; 17376; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17377; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17378; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17379; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17380; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17381; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17382; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17383; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17384; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17385; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17386; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17387; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17388; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17389; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 17390; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17391; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17392; 17393; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17394; GFX940-TGSPLIT: ; %bb.0: ; %entry 17395; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17396; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17397; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17398; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17399; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17400; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17401; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17402; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17403; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17404; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17405; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17406; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 17407; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17408; GFX940-TGSPLIT-NEXT: s_endpgm 17409; 17410; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17411; GFX11-WGP: ; %bb.0: ; %entry 17412; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17413; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17414; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17415; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17416; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17417; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17418; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17419; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17420; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17421; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17422; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17423; GFX11-WGP-NEXT: buffer_gl1_inv 17424; GFX11-WGP-NEXT: buffer_gl0_inv 17425; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17426; GFX11-WGP-NEXT: s_endpgm 17427; 17428; GFX11-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17429; GFX11-CU: ; %bb.0: ; %entry 17430; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17431; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17432; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17433; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17434; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17435; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17436; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17437; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17438; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17439; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17440; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17441; GFX11-CU-NEXT: buffer_gl1_inv 17442; GFX11-CU-NEXT: buffer_gl0_inv 17443; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17444; GFX11-CU-NEXT: s_endpgm 17445; 17446; GFX12-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17447; GFX12-WGP: ; %bb.0: ; %entry 17448; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17449; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17450; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17451; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17452; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17453; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17454; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17455; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17456; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17457; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17458; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17459; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 17460; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17461; GFX12-WGP-NEXT: s_endpgm 17462; 17463; GFX12-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 17464; GFX12-CU: ; %bb.0: ; %entry 17465; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17466; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17467; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17468; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17469; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17470; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17471; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17472; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17473; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17474; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17475; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17476; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 17477; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17478; GFX12-CU-NEXT: s_endpgm 17479 ptr addrspace(1) %out, i32 %in, i32 %old) { 17480entry: 17481 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17482 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 17483 %val0 = extractvalue { i32, i1 } %val, 0 17484 store i32 %val0, ptr addrspace(1) %out, align 4 17485 ret void 17486} 17487 17488define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg( 17489; GFX6-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17490; GFX6: ; %bb.0: ; %entry 17491; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17492; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17493; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17494; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17495; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17496; GFX6-NEXT: s_mov_b32 s12, s5 17497; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17498; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17499; GFX6-NEXT: s_mov_b32 s11, -1 17500; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17501; GFX6-NEXT: s_mov_b32 s5, s12 17502; GFX6-NEXT: s_mov_b32 s6, s11 17503; GFX6-NEXT: s_mov_b32 s7, s10 17504; GFX6-NEXT: v_mov_b32_e32 v0, s9 17505; GFX6-NEXT: v_mov_b32_e32 v2, s8 17506; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17507; GFX6-NEXT: v_mov_b32_e32 v1, v2 17508; GFX6-NEXT: s_waitcnt vmcnt(0) 17509; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17510; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17511; GFX6-NEXT: s_waitcnt vmcnt(0) 17512; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17513; GFX6-NEXT: s_endpgm 17514; 17515; GFX7-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17516; GFX7: ; %bb.0: ; %entry 17517; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17518; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17519; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17520; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17521; GFX7-NEXT: s_mov_b64 s[12:13], 16 17522; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17523; GFX7-NEXT: s_mov_b32 s6, s4 17524; GFX7-NEXT: s_mov_b32 s7, s5 17525; GFX7-NEXT: s_mov_b32 s11, s12 17526; GFX7-NEXT: s_mov_b32 s10, s13 17527; GFX7-NEXT: s_add_u32 s6, s6, s11 17528; GFX7-NEXT: s_addc_u32 s10, s7, s10 17529; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17530; GFX7-NEXT: s_mov_b32 s7, s10 17531; GFX7-NEXT: v_mov_b32_e32 v2, s9 17532; GFX7-NEXT: v_mov_b32_e32 v0, s8 17533; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17534; GFX7-NEXT: v_mov_b32_e32 v3, v0 17535; GFX7-NEXT: v_mov_b32_e32 v0, s6 17536; GFX7-NEXT: v_mov_b32_e32 v1, s7 17537; GFX7-NEXT: s_waitcnt vmcnt(0) 17538; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17539; GFX7-NEXT: v_mov_b32_e32 v0, s4 17540; GFX7-NEXT: v_mov_b32_e32 v1, s5 17541; GFX7-NEXT: s_waitcnt vmcnt(0) 17542; GFX7-NEXT: flat_store_dword v[0:1], v2 17543; GFX7-NEXT: s_endpgm 17544; 17545; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17546; GFX10-WGP: ; %bb.0: ; %entry 17547; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17548; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17549; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17550; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17551; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17552; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17553; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17554; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17555; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17556; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17557; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17558; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17559; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17560; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17561; GFX10-WGP-NEXT: s_endpgm 17562; 17563; GFX10-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17564; GFX10-CU: ; %bb.0: ; %entry 17565; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17566; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17567; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17568; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17569; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17570; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17571; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17572; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17573; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17574; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17575; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17576; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17577; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17578; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17579; GFX10-CU-NEXT: s_endpgm 17580; 17581; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17582; SKIP-CACHE-INV: ; %bb.0: ; %entry 17583; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17584; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17585; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17586; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17587; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17588; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17589; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17590; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17591; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17592; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17593; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17594; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17595; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17596; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17597; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17598; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17599; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17600; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17601; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17602; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17603; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17604; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17605; SKIP-CACHE-INV-NEXT: s_endpgm 17606; 17607; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17608; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17609; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17610; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17611; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17612; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17613; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17614; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17615; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17616; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17617; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17618; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17619; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17620; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17621; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17622; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17623; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17624; 17625; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17626; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17627; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17628; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17629; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17630; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17631; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17632; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17633; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17634; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17635; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17636; GFX90A-TGSPLIT-NEXT: buffer_wbl2 17637; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17638; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17639; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17640; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17641; GFX90A-TGSPLIT-NEXT: s_endpgm 17642; 17643; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17644; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17645; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17646; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17647; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17648; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17649; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17650; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17651; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17652; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17653; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17654; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17655; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17656; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17657; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17658; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17659; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17660; 17661; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17662; GFX940-TGSPLIT: ; %bb.0: ; %entry 17663; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17664; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17665; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17666; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17667; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17668; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17669; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17670; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17671; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17672; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17673; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17674; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17675; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17676; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17677; GFX940-TGSPLIT-NEXT: s_endpgm 17678; 17679; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17680; GFX11-WGP: ; %bb.0: ; %entry 17681; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17682; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17683; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17684; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17685; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17686; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17687; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17688; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17689; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17690; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17691; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17692; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17693; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17694; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17695; GFX11-WGP-NEXT: s_endpgm 17696; 17697; GFX11-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17698; GFX11-CU: ; %bb.0: ; %entry 17699; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17700; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17701; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17702; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17703; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17704; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17705; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17706; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17707; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17708; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17709; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17710; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17711; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17712; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17713; GFX11-CU-NEXT: s_endpgm 17714; 17715; GFX12-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17716; GFX12-WGP: ; %bb.0: ; %entry 17717; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17718; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17719; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17720; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17721; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17722; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17723; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17724; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17725; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17726; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 17727; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17728; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17729; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17730; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17731; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17732; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17733; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17734; GFX12-WGP-NEXT: s_endpgm 17735; 17736; GFX12-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: 17737; GFX12-CU: ; %bb.0: ; %entry 17738; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17739; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17740; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17741; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17742; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17743; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17744; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17745; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17746; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17747; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 17748; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17749; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17750; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17751; GFX12-CU-NEXT: s_wait_storecnt 0x0 17752; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 17753; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17754; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17755; GFX12-CU-NEXT: s_endpgm 17756 ptr addrspace(1) %out, i32 %in, i32 %old) { 17757entry: 17758 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17759 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 17760 %val0 = extractvalue { i32, i1 } %val, 0 17761 store i32 %val0, ptr addrspace(1) %out, align 4 17762 ret void 17763} 17764 17765define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( 17766; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17767; GFX6: ; %bb.0: ; %entry 17768; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17769; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17770; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17771; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17772; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17773; GFX6-NEXT: s_mov_b32 s12, s5 17774; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17775; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17776; GFX6-NEXT: s_mov_b32 s11, -1 17777; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17778; GFX6-NEXT: s_mov_b32 s5, s12 17779; GFX6-NEXT: s_mov_b32 s6, s11 17780; GFX6-NEXT: s_mov_b32 s7, s10 17781; GFX6-NEXT: v_mov_b32_e32 v0, s9 17782; GFX6-NEXT: v_mov_b32_e32 v2, s8 17783; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17784; GFX6-NEXT: v_mov_b32_e32 v1, v2 17785; GFX6-NEXT: s_waitcnt vmcnt(0) 17786; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17787; GFX6-NEXT: s_waitcnt vmcnt(0) 17788; GFX6-NEXT: buffer_wbinvl1 17789; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17790; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17791; GFX6-NEXT: s_endpgm 17792; 17793; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17794; GFX7: ; %bb.0: ; %entry 17795; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17796; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17797; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17798; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17799; GFX7-NEXT: s_mov_b64 s[12:13], 16 17800; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17801; GFX7-NEXT: s_mov_b32 s6, s4 17802; GFX7-NEXT: s_mov_b32 s7, s5 17803; GFX7-NEXT: s_mov_b32 s11, s12 17804; GFX7-NEXT: s_mov_b32 s10, s13 17805; GFX7-NEXT: s_add_u32 s6, s6, s11 17806; GFX7-NEXT: s_addc_u32 s10, s7, s10 17807; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17808; GFX7-NEXT: s_mov_b32 s7, s10 17809; GFX7-NEXT: v_mov_b32_e32 v2, s9 17810; GFX7-NEXT: v_mov_b32_e32 v0, s8 17811; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17812; GFX7-NEXT: v_mov_b32_e32 v3, v0 17813; GFX7-NEXT: v_mov_b32_e32 v0, s6 17814; GFX7-NEXT: v_mov_b32_e32 v1, s7 17815; GFX7-NEXT: s_waitcnt vmcnt(0) 17816; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17817; GFX7-NEXT: s_waitcnt vmcnt(0) 17818; GFX7-NEXT: buffer_wbinvl1_vol 17819; GFX7-NEXT: v_mov_b32_e32 v0, s4 17820; GFX7-NEXT: v_mov_b32_e32 v1, s5 17821; GFX7-NEXT: flat_store_dword v[0:1], v2 17822; GFX7-NEXT: s_endpgm 17823; 17824; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17825; GFX10-WGP: ; %bb.0: ; %entry 17826; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17827; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17828; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17829; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17830; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17831; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17832; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17833; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17834; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17835; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17836; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17837; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17838; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17839; GFX10-WGP-NEXT: buffer_gl1_inv 17840; GFX10-WGP-NEXT: buffer_gl0_inv 17841; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17842; GFX10-WGP-NEXT: s_endpgm 17843; 17844; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17845; GFX10-CU: ; %bb.0: ; %entry 17846; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17847; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17848; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17849; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17850; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17851; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17852; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17853; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17854; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17855; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17856; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17857; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17858; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17859; GFX10-CU-NEXT: buffer_gl1_inv 17860; GFX10-CU-NEXT: buffer_gl0_inv 17861; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17862; GFX10-CU-NEXT: s_endpgm 17863; 17864; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17865; SKIP-CACHE-INV: ; %bb.0: ; %entry 17866; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17867; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17868; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17869; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17870; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17871; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17872; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17873; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17874; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17875; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17876; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17877; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17878; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17879; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17880; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17881; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17882; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17883; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17884; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17885; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17886; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17887; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17888; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17889; SKIP-CACHE-INV-NEXT: s_endpgm 17890; 17891; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17892; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17893; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17894; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17895; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17896; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17897; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17898; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17899; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17900; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17901; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17902; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17903; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17904; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17905; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17906; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17907; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17908; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17909; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17910; 17911; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17912; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17913; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17914; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17915; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17916; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17917; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17918; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17919; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17920; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17921; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17922; GFX90A-TGSPLIT-NEXT: buffer_wbl2 17923; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17924; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17925; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17926; GFX90A-TGSPLIT-NEXT: buffer_invl2 17927; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17928; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17929; GFX90A-TGSPLIT-NEXT: s_endpgm 17930; 17931; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17932; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17933; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17934; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17935; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17936; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17937; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17938; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17939; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17940; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17941; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17942; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17943; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17944; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17945; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17946; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 17947; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17948; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17949; 17950; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17951; GFX940-TGSPLIT: ; %bb.0: ; %entry 17952; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17953; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17954; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17955; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17956; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17957; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17958; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17959; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17960; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17961; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17962; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17963; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 17964; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17965; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 17966; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17967; GFX940-TGSPLIT-NEXT: s_endpgm 17968; 17969; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17970; GFX11-WGP: ; %bb.0: ; %entry 17971; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17972; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17973; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17974; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17975; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17976; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17977; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17978; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17979; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17980; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17981; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17982; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17983; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17984; GFX11-WGP-NEXT: buffer_gl1_inv 17985; GFX11-WGP-NEXT: buffer_gl0_inv 17986; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17987; GFX11-WGP-NEXT: s_endpgm 17988; 17989; GFX11-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 17990; GFX11-CU: ; %bb.0: ; %entry 17991; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17992; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17993; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17994; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17995; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17996; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17997; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17998; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17999; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18000; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18001; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18002; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18003; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18004; GFX11-CU-NEXT: buffer_gl1_inv 18005; GFX11-CU-NEXT: buffer_gl0_inv 18006; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18007; GFX11-CU-NEXT: s_endpgm 18008; 18009; GFX12-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 18010; GFX12-WGP: ; %bb.0: ; %entry 18011; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18012; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18013; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18014; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18015; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18016; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18017; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18018; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18019; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18020; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 18021; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18022; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18023; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18024; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18025; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18026; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18027; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18028; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18029; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18030; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18031; GFX12-WGP-NEXT: s_endpgm 18032; 18033; GFX12-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 18034; GFX12-CU: ; %bb.0: ; %entry 18035; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18036; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18037; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18038; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18039; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18040; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18041; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18042; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18043; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18044; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 18045; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18046; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18047; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18048; GFX12-CU-NEXT: s_wait_storecnt 0x0 18049; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18050; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18051; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18052; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18053; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18054; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18055; GFX12-CU-NEXT: s_endpgm 18056 ptr addrspace(1) %out, i32 %in, i32 %old) { 18057entry: 18058 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18059 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 18060 %val0 = extractvalue { i32, i1 } %val, 0 18061 store i32 %val0, ptr addrspace(1) %out, align 4 18062 ret void 18063} 18064 18065define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( 18066; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18067; GFX6: ; %bb.0: ; %entry 18068; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18069; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18070; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18071; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18072; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18073; GFX6-NEXT: s_mov_b32 s12, s5 18074; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18075; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18076; GFX6-NEXT: s_mov_b32 s11, -1 18077; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18078; GFX6-NEXT: s_mov_b32 s5, s12 18079; GFX6-NEXT: s_mov_b32 s6, s11 18080; GFX6-NEXT: s_mov_b32 s7, s10 18081; GFX6-NEXT: v_mov_b32_e32 v0, s9 18082; GFX6-NEXT: v_mov_b32_e32 v2, s8 18083; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18084; GFX6-NEXT: v_mov_b32_e32 v1, v2 18085; GFX6-NEXT: s_waitcnt vmcnt(0) 18086; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18087; GFX6-NEXT: s_waitcnt vmcnt(0) 18088; GFX6-NEXT: buffer_wbinvl1 18089; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18090; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18091; GFX6-NEXT: s_endpgm 18092; 18093; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18094; GFX7: ; %bb.0: ; %entry 18095; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18096; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18097; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18098; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18099; GFX7-NEXT: s_mov_b64 s[12:13], 16 18100; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18101; GFX7-NEXT: s_mov_b32 s6, s4 18102; GFX7-NEXT: s_mov_b32 s7, s5 18103; GFX7-NEXT: s_mov_b32 s11, s12 18104; GFX7-NEXT: s_mov_b32 s10, s13 18105; GFX7-NEXT: s_add_u32 s6, s6, s11 18106; GFX7-NEXT: s_addc_u32 s10, s7, s10 18107; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18108; GFX7-NEXT: s_mov_b32 s7, s10 18109; GFX7-NEXT: v_mov_b32_e32 v2, s9 18110; GFX7-NEXT: v_mov_b32_e32 v0, s8 18111; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18112; GFX7-NEXT: v_mov_b32_e32 v3, v0 18113; GFX7-NEXT: v_mov_b32_e32 v0, s6 18114; GFX7-NEXT: v_mov_b32_e32 v1, s7 18115; GFX7-NEXT: s_waitcnt vmcnt(0) 18116; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18117; GFX7-NEXT: s_waitcnt vmcnt(0) 18118; GFX7-NEXT: buffer_wbinvl1_vol 18119; GFX7-NEXT: v_mov_b32_e32 v0, s4 18120; GFX7-NEXT: v_mov_b32_e32 v1, s5 18121; GFX7-NEXT: flat_store_dword v[0:1], v2 18122; GFX7-NEXT: s_endpgm 18123; 18124; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18125; GFX10-WGP: ; %bb.0: ; %entry 18126; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18127; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18128; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18129; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18130; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18131; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18132; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18133; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18134; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18135; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18136; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18137; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18138; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18139; GFX10-WGP-NEXT: buffer_gl1_inv 18140; GFX10-WGP-NEXT: buffer_gl0_inv 18141; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18142; GFX10-WGP-NEXT: s_endpgm 18143; 18144; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18145; GFX10-CU: ; %bb.0: ; %entry 18146; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18147; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18148; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18149; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18150; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18151; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18152; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18153; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18154; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18155; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18156; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18157; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18158; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18159; GFX10-CU-NEXT: buffer_gl1_inv 18160; GFX10-CU-NEXT: buffer_gl0_inv 18161; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18162; GFX10-CU-NEXT: s_endpgm 18163; 18164; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18165; SKIP-CACHE-INV: ; %bb.0: ; %entry 18166; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18167; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18168; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18169; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18170; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18171; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18172; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18173; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18174; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18175; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18176; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18177; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18178; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18179; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18180; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18181; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18182; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18183; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18184; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18185; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18186; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18187; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18188; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18189; SKIP-CACHE-INV-NEXT: s_endpgm 18190; 18191; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18192; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18193; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18194; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18195; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18196; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18197; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18198; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18199; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18200; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18201; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18202; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 18203; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18204; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18205; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18206; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 18207; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18208; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18209; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18210; 18211; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18212; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18213; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18214; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18215; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18216; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18217; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18218; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18219; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18220; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18221; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18222; GFX90A-TGSPLIT-NEXT: buffer_wbl2 18223; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18224; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18225; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18226; GFX90A-TGSPLIT-NEXT: buffer_invl2 18227; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18228; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18229; GFX90A-TGSPLIT-NEXT: s_endpgm 18230; 18231; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18232; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18233; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18234; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18235; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18236; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18237; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18238; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18239; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18240; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18241; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18242; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18243; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18244; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18245; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18246; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18247; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18248; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18249; 18250; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18251; GFX940-TGSPLIT: ; %bb.0: ; %entry 18252; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18253; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18254; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18255; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18256; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18257; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18258; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18259; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18260; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18261; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18262; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18263; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18264; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18265; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18266; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18267; GFX940-TGSPLIT-NEXT: s_endpgm 18268; 18269; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18270; GFX11-WGP: ; %bb.0: ; %entry 18271; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18272; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18273; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18274; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18275; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18276; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18277; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18278; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18279; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18280; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18281; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18282; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18283; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18284; GFX11-WGP-NEXT: buffer_gl1_inv 18285; GFX11-WGP-NEXT: buffer_gl0_inv 18286; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18287; GFX11-WGP-NEXT: s_endpgm 18288; 18289; GFX11-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18290; GFX11-CU: ; %bb.0: ; %entry 18291; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18292; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18293; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18294; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18295; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18296; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18297; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18298; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18299; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18300; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18301; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18302; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18303; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18304; GFX11-CU-NEXT: buffer_gl1_inv 18305; GFX11-CU-NEXT: buffer_gl0_inv 18306; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18307; GFX11-CU-NEXT: s_endpgm 18308; 18309; GFX12-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18310; GFX12-WGP: ; %bb.0: ; %entry 18311; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18312; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18313; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18314; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18315; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18316; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18317; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18318; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18319; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18320; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 18321; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18322; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18323; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18324; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18325; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18326; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18327; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18328; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18329; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18330; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18331; GFX12-WGP-NEXT: s_endpgm 18332; 18333; GFX12-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 18334; GFX12-CU: ; %bb.0: ; %entry 18335; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18336; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18337; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18338; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18339; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18340; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18341; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18342; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18343; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18344; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 18345; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18346; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18347; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18348; GFX12-CU-NEXT: s_wait_storecnt 0x0 18349; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18350; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18351; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18352; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18353; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18354; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18355; GFX12-CU-NEXT: s_endpgm 18356 ptr addrspace(1) %out, i32 %in, i32 %old) { 18357entry: 18358 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18359 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 18360 %val0 = extractvalue { i32, i1 } %val, 0 18361 store i32 %val0, ptr addrspace(1) %out, align 4 18362 ret void 18363} 18364 18365define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( 18366; GFX6-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18367; GFX6: ; %bb.0: ; %entry 18368; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18369; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18370; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18371; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18372; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18373; GFX6-NEXT: s_mov_b32 s12, s5 18374; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18375; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18376; GFX6-NEXT: s_mov_b32 s11, -1 18377; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18378; GFX6-NEXT: s_mov_b32 s5, s12 18379; GFX6-NEXT: s_mov_b32 s6, s11 18380; GFX6-NEXT: s_mov_b32 s7, s10 18381; GFX6-NEXT: v_mov_b32_e32 v0, s9 18382; GFX6-NEXT: v_mov_b32_e32 v2, s8 18383; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18384; GFX6-NEXT: v_mov_b32_e32 v1, v2 18385; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18386; GFX6-NEXT: s_waitcnt vmcnt(0) 18387; GFX6-NEXT: buffer_wbinvl1 18388; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18389; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18390; GFX6-NEXT: s_endpgm 18391; 18392; GFX7-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18393; GFX7: ; %bb.0: ; %entry 18394; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18395; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18396; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18397; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18398; GFX7-NEXT: s_mov_b64 s[12:13], 16 18399; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18400; GFX7-NEXT: s_mov_b32 s6, s4 18401; GFX7-NEXT: s_mov_b32 s7, s5 18402; GFX7-NEXT: s_mov_b32 s11, s12 18403; GFX7-NEXT: s_mov_b32 s10, s13 18404; GFX7-NEXT: s_add_u32 s6, s6, s11 18405; GFX7-NEXT: s_addc_u32 s10, s7, s10 18406; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18407; GFX7-NEXT: s_mov_b32 s7, s10 18408; GFX7-NEXT: v_mov_b32_e32 v2, s9 18409; GFX7-NEXT: v_mov_b32_e32 v0, s8 18410; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18411; GFX7-NEXT: v_mov_b32_e32 v3, v0 18412; GFX7-NEXT: v_mov_b32_e32 v0, s6 18413; GFX7-NEXT: v_mov_b32_e32 v1, s7 18414; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18415; GFX7-NEXT: s_waitcnt vmcnt(0) 18416; GFX7-NEXT: buffer_wbinvl1_vol 18417; GFX7-NEXT: v_mov_b32_e32 v0, s4 18418; GFX7-NEXT: v_mov_b32_e32 v1, s5 18419; GFX7-NEXT: flat_store_dword v[0:1], v2 18420; GFX7-NEXT: s_endpgm 18421; 18422; GFX10-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18423; GFX10-WGP: ; %bb.0: ; %entry 18424; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18425; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18426; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18427; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18428; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18429; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18430; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18431; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18432; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18433; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18434; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18435; GFX10-WGP-NEXT: buffer_gl1_inv 18436; GFX10-WGP-NEXT: buffer_gl0_inv 18437; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18438; GFX10-WGP-NEXT: s_endpgm 18439; 18440; GFX10-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18441; GFX10-CU: ; %bb.0: ; %entry 18442; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18443; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18444; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18445; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18446; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18447; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18448; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18449; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18450; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18451; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18452; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18453; GFX10-CU-NEXT: buffer_gl1_inv 18454; GFX10-CU-NEXT: buffer_gl0_inv 18455; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18456; GFX10-CU-NEXT: s_endpgm 18457; 18458; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18459; SKIP-CACHE-INV: ; %bb.0: ; %entry 18460; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18461; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18462; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18463; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18464; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18465; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18466; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18467; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18468; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18469; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18470; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18471; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18472; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18473; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18474; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18475; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18476; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18477; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18478; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18479; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18480; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18481; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18482; SKIP-CACHE-INV-NEXT: s_endpgm 18483; 18484; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18485; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18486; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18487; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18488; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18489; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18490; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18491; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18492; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18493; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18494; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18495; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18496; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18497; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 18498; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18499; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18500; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18501; 18502; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18503; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18504; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18505; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18506; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18507; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18508; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18509; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18510; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18511; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18512; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18513; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18514; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18515; GFX90A-TGSPLIT-NEXT: buffer_invl2 18516; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18517; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18518; GFX90A-TGSPLIT-NEXT: s_endpgm 18519; 18520; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18521; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18522; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18523; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18524; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18525; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18526; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18527; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18528; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18529; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18530; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18531; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18532; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18533; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18534; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18535; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18536; 18537; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18538; GFX940-TGSPLIT: ; %bb.0: ; %entry 18539; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18540; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18541; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18542; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18543; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18544; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18546; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18547; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18548; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18549; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18550; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18551; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18552; GFX940-TGSPLIT-NEXT: s_endpgm 18553; 18554; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18555; GFX11-WGP: ; %bb.0: ; %entry 18556; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18557; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18558; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18559; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18560; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18561; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18562; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18563; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18564; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18565; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18566; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18567; GFX11-WGP-NEXT: buffer_gl1_inv 18568; GFX11-WGP-NEXT: buffer_gl0_inv 18569; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18570; GFX11-WGP-NEXT: s_endpgm 18571; 18572; GFX11-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18573; GFX11-CU: ; %bb.0: ; %entry 18574; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18575; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18576; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18577; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18578; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18579; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18580; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18581; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18582; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18583; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18584; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18585; GFX11-CU-NEXT: buffer_gl1_inv 18586; GFX11-CU-NEXT: buffer_gl0_inv 18587; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18588; GFX11-CU-NEXT: s_endpgm 18589; 18590; GFX12-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18591; GFX12-WGP: ; %bb.0: ; %entry 18592; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18593; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18594; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18595; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18596; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18597; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18598; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18599; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18600; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18601; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18602; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18603; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18604; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18605; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18606; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18607; GFX12-WGP-NEXT: s_endpgm 18608; 18609; GFX12-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: 18610; GFX12-CU: ; %bb.0: ; %entry 18611; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18612; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18613; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18614; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18615; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18616; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18617; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18618; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18619; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18620; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18621; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18622; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18623; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18624; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18625; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18626; GFX12-CU-NEXT: s_endpgm 18627 ptr addrspace(1) %out, i32 %in, i32 %old) { 18628entry: 18629 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18630 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire 18631 %val0 = extractvalue { i32, i1 } %val, 0 18632 store i32 %val0, ptr addrspace(1) %out, align 4 18633 ret void 18634} 18635 18636define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( 18637; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18638; GFX6: ; %bb.0: ; %entry 18639; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18640; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18641; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18642; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18643; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18644; GFX6-NEXT: s_mov_b32 s12, s5 18645; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18646; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18647; GFX6-NEXT: s_mov_b32 s11, -1 18648; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18649; GFX6-NEXT: s_mov_b32 s5, s12 18650; GFX6-NEXT: s_mov_b32 s6, s11 18651; GFX6-NEXT: s_mov_b32 s7, s10 18652; GFX6-NEXT: v_mov_b32_e32 v0, s9 18653; GFX6-NEXT: v_mov_b32_e32 v2, s8 18654; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18655; GFX6-NEXT: v_mov_b32_e32 v1, v2 18656; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18657; GFX6-NEXT: s_waitcnt vmcnt(0) 18658; GFX6-NEXT: buffer_wbinvl1 18659; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18660; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18661; GFX6-NEXT: s_endpgm 18662; 18663; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18664; GFX7: ; %bb.0: ; %entry 18665; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18666; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18667; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18668; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18669; GFX7-NEXT: s_mov_b64 s[12:13], 16 18670; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18671; GFX7-NEXT: s_mov_b32 s6, s4 18672; GFX7-NEXT: s_mov_b32 s7, s5 18673; GFX7-NEXT: s_mov_b32 s11, s12 18674; GFX7-NEXT: s_mov_b32 s10, s13 18675; GFX7-NEXT: s_add_u32 s6, s6, s11 18676; GFX7-NEXT: s_addc_u32 s10, s7, s10 18677; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18678; GFX7-NEXT: s_mov_b32 s7, s10 18679; GFX7-NEXT: v_mov_b32_e32 v2, s9 18680; GFX7-NEXT: v_mov_b32_e32 v0, s8 18681; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18682; GFX7-NEXT: v_mov_b32_e32 v3, v0 18683; GFX7-NEXT: v_mov_b32_e32 v0, s6 18684; GFX7-NEXT: v_mov_b32_e32 v1, s7 18685; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18686; GFX7-NEXT: s_waitcnt vmcnt(0) 18687; GFX7-NEXT: buffer_wbinvl1_vol 18688; GFX7-NEXT: v_mov_b32_e32 v0, s4 18689; GFX7-NEXT: v_mov_b32_e32 v1, s5 18690; GFX7-NEXT: flat_store_dword v[0:1], v2 18691; GFX7-NEXT: s_endpgm 18692; 18693; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18694; GFX10-WGP: ; %bb.0: ; %entry 18695; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18696; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18697; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18698; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18699; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18700; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18701; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18702; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18703; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18704; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18705; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18706; GFX10-WGP-NEXT: buffer_gl1_inv 18707; GFX10-WGP-NEXT: buffer_gl0_inv 18708; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18709; GFX10-WGP-NEXT: s_endpgm 18710; 18711; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18712; GFX10-CU: ; %bb.0: ; %entry 18713; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18714; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18715; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18716; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18717; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18718; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18719; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18720; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18721; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18722; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18723; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18724; GFX10-CU-NEXT: buffer_gl1_inv 18725; GFX10-CU-NEXT: buffer_gl0_inv 18726; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18727; GFX10-CU-NEXT: s_endpgm 18728; 18729; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18730; SKIP-CACHE-INV: ; %bb.0: ; %entry 18731; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18732; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18733; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18734; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18735; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18736; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18737; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18738; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18739; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18740; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18741; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18742; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18743; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18744; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18746; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18747; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18748; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18749; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18750; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18751; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18752; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18753; SKIP-CACHE-INV-NEXT: s_endpgm 18754; 18755; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18756; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18757; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18758; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18759; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18760; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18761; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18762; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18763; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18764; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18765; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18766; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18767; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18768; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 18769; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18770; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18771; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18772; 18773; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18774; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18775; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18776; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18777; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18778; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18779; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18780; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18781; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18782; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18783; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18784; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18785; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18786; GFX90A-TGSPLIT-NEXT: buffer_invl2 18787; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18788; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18789; GFX90A-TGSPLIT-NEXT: s_endpgm 18790; 18791; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18792; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18793; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18794; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18795; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18796; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18797; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18798; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18799; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18800; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18801; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18802; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18803; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18804; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18805; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18806; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18807; 18808; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18809; GFX940-TGSPLIT: ; %bb.0: ; %entry 18810; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18811; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18812; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18813; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18814; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18815; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18816; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18817; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18819; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 18820; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18821; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18822; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18823; GFX940-TGSPLIT-NEXT: s_endpgm 18824; 18825; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18826; GFX11-WGP: ; %bb.0: ; %entry 18827; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18828; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18829; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18830; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18831; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18832; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18833; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18834; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18835; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18836; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18837; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18838; GFX11-WGP-NEXT: buffer_gl1_inv 18839; GFX11-WGP-NEXT: buffer_gl0_inv 18840; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18841; GFX11-WGP-NEXT: s_endpgm 18842; 18843; GFX11-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18844; GFX11-CU: ; %bb.0: ; %entry 18845; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18846; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18847; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18848; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18849; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18850; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18851; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18852; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18853; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18854; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18855; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18856; GFX11-CU-NEXT: buffer_gl1_inv 18857; GFX11-CU-NEXT: buffer_gl0_inv 18858; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18859; GFX11-CU-NEXT: s_endpgm 18860; 18861; GFX12-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18862; GFX12-WGP: ; %bb.0: ; %entry 18863; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18864; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18865; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18866; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18867; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18868; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18869; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18870; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18871; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18872; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18873; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18874; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18875; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18876; GFX12-WGP-NEXT: s_endpgm 18877; 18878; GFX12-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 18879; GFX12-CU: ; %bb.0: ; %entry 18880; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18881; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18882; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18883; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18884; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18885; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18886; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18887; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18888; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18889; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18890; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18891; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18892; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18893; GFX12-CU-NEXT: s_endpgm 18894 ptr addrspace(1) %out, i32 %in, i32 %old) { 18895entry: 18896 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18897 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 18898 %val0 = extractvalue { i32, i1 } %val, 0 18899 store i32 %val0, ptr addrspace(1) %out, align 4 18900 ret void 18901} 18902 18903define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( 18904; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 18905; GFX6: ; %bb.0: ; %entry 18906; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18907; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18908; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18909; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18910; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18911; GFX6-NEXT: s_mov_b32 s12, s5 18912; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18913; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18914; GFX6-NEXT: s_mov_b32 s11, -1 18915; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18916; GFX6-NEXT: s_mov_b32 s5, s12 18917; GFX6-NEXT: s_mov_b32 s6, s11 18918; GFX6-NEXT: s_mov_b32 s7, s10 18919; GFX6-NEXT: v_mov_b32_e32 v0, s9 18920; GFX6-NEXT: v_mov_b32_e32 v2, s8 18921; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18922; GFX6-NEXT: v_mov_b32_e32 v1, v2 18923; GFX6-NEXT: s_waitcnt vmcnt(0) 18924; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18925; GFX6-NEXT: s_waitcnt vmcnt(0) 18926; GFX6-NEXT: buffer_wbinvl1 18927; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18928; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18929; GFX6-NEXT: s_endpgm 18930; 18931; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 18932; GFX7: ; %bb.0: ; %entry 18933; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18934; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18935; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18936; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18937; GFX7-NEXT: s_mov_b64 s[12:13], 16 18938; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18939; GFX7-NEXT: s_mov_b32 s6, s4 18940; GFX7-NEXT: s_mov_b32 s7, s5 18941; GFX7-NEXT: s_mov_b32 s11, s12 18942; GFX7-NEXT: s_mov_b32 s10, s13 18943; GFX7-NEXT: s_add_u32 s6, s6, s11 18944; GFX7-NEXT: s_addc_u32 s10, s7, s10 18945; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18946; GFX7-NEXT: s_mov_b32 s7, s10 18947; GFX7-NEXT: v_mov_b32_e32 v2, s9 18948; GFX7-NEXT: v_mov_b32_e32 v0, s8 18949; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18950; GFX7-NEXT: v_mov_b32_e32 v3, v0 18951; GFX7-NEXT: v_mov_b32_e32 v0, s6 18952; GFX7-NEXT: v_mov_b32_e32 v1, s7 18953; GFX7-NEXT: s_waitcnt vmcnt(0) 18954; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18955; GFX7-NEXT: s_waitcnt vmcnt(0) 18956; GFX7-NEXT: buffer_wbinvl1_vol 18957; GFX7-NEXT: v_mov_b32_e32 v0, s4 18958; GFX7-NEXT: v_mov_b32_e32 v1, s5 18959; GFX7-NEXT: flat_store_dword v[0:1], v2 18960; GFX7-NEXT: s_endpgm 18961; 18962; GFX10-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 18963; GFX10-WGP: ; %bb.0: ; %entry 18964; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18965; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18966; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18967; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18968; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18969; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18970; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18971; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18972; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18973; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18974; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18975; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18976; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18977; GFX10-WGP-NEXT: buffer_gl1_inv 18978; GFX10-WGP-NEXT: buffer_gl0_inv 18979; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18980; GFX10-WGP-NEXT: s_endpgm 18981; 18982; GFX10-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 18983; GFX10-CU: ; %bb.0: ; %entry 18984; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18985; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18986; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18987; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18988; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18989; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18990; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18991; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18992; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18993; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18994; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18995; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18996; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18997; GFX10-CU-NEXT: buffer_gl1_inv 18998; GFX10-CU-NEXT: buffer_gl0_inv 18999; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19000; GFX10-CU-NEXT: s_endpgm 19001; 19002; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19003; SKIP-CACHE-INV: ; %bb.0: ; %entry 19004; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19005; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19006; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19007; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19008; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19009; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19010; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19011; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19012; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19013; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19014; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19015; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19016; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19017; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19018; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19019; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19020; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19021; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19022; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19023; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19024; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19025; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19026; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19027; SKIP-CACHE-INV-NEXT: s_endpgm 19028; 19029; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19030; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19031; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19032; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19033; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19034; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19035; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19036; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19037; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19038; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19039; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19040; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19041; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19042; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19043; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19044; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19045; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19046; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19047; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19048; 19049; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19050; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19051; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19052; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19053; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19054; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19055; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19056; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19057; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19058; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19059; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19060; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19061; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19062; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19063; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19064; GFX90A-TGSPLIT-NEXT: buffer_invl2 19065; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19066; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19067; GFX90A-TGSPLIT-NEXT: s_endpgm 19068; 19069; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19070; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19071; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19072; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19073; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19074; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19075; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19076; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19077; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19078; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19079; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19080; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19081; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19082; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19083; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19084; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19085; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19086; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19087; 19088; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19089; GFX940-TGSPLIT: ; %bb.0: ; %entry 19090; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19091; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19092; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19093; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19094; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19095; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19096; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19097; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19098; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19099; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19100; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19101; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19102; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19103; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 19104; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19105; GFX940-TGSPLIT-NEXT: s_endpgm 19106; 19107; GFX11-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19108; GFX11-WGP: ; %bb.0: ; %entry 19109; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19110; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19111; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19112; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19113; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19114; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19115; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19116; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19117; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19118; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19119; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19120; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19121; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19122; GFX11-WGP-NEXT: buffer_gl1_inv 19123; GFX11-WGP-NEXT: buffer_gl0_inv 19124; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19125; GFX11-WGP-NEXT: s_endpgm 19126; 19127; GFX11-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19128; GFX11-CU: ; %bb.0: ; %entry 19129; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19130; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19131; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19132; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19133; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19134; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19135; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19136; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19137; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19138; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19139; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19140; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19141; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19142; GFX11-CU-NEXT: buffer_gl1_inv 19143; GFX11-CU-NEXT: buffer_gl0_inv 19144; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19145; GFX11-CU-NEXT: s_endpgm 19146; 19147; GFX12-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19148; GFX12-WGP: ; %bb.0: ; %entry 19149; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19150; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19151; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19152; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19153; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19154; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19155; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19156; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19157; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19158; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19159; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19160; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19161; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19162; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19163; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19164; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19165; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19166; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19167; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 19168; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19169; GFX12-WGP-NEXT: s_endpgm 19170; 19171; GFX12-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 19172; GFX12-CU: ; %bb.0: ; %entry 19173; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19174; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19175; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19176; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19177; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19178; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19179; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19180; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19181; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19182; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19183; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19184; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19185; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19186; GFX12-CU-NEXT: s_wait_storecnt 0x0 19187; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19188; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19189; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19190; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19191; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 19192; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19193; GFX12-CU-NEXT: s_endpgm 19194 ptr addrspace(1) %out, i32 %in, i32 %old) { 19195entry: 19196 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19197 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire 19198 %val0 = extractvalue { i32, i1 } %val, 0 19199 store i32 %val0, ptr addrspace(1) %out, align 4 19200 ret void 19201} 19202 19203define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( 19204; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19205; GFX6: ; %bb.0: ; %entry 19206; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19207; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19208; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19209; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19210; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19211; GFX6-NEXT: s_mov_b32 s12, s5 19212; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19213; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19214; GFX6-NEXT: s_mov_b32 s11, -1 19215; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19216; GFX6-NEXT: s_mov_b32 s5, s12 19217; GFX6-NEXT: s_mov_b32 s6, s11 19218; GFX6-NEXT: s_mov_b32 s7, s10 19219; GFX6-NEXT: v_mov_b32_e32 v0, s9 19220; GFX6-NEXT: v_mov_b32_e32 v2, s8 19221; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19222; GFX6-NEXT: v_mov_b32_e32 v1, v2 19223; GFX6-NEXT: s_waitcnt vmcnt(0) 19224; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19225; GFX6-NEXT: s_waitcnt vmcnt(0) 19226; GFX6-NEXT: buffer_wbinvl1 19227; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19228; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19229; GFX6-NEXT: s_endpgm 19230; 19231; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19232; GFX7: ; %bb.0: ; %entry 19233; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19234; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19235; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19236; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19237; GFX7-NEXT: s_mov_b64 s[12:13], 16 19238; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19239; GFX7-NEXT: s_mov_b32 s6, s4 19240; GFX7-NEXT: s_mov_b32 s7, s5 19241; GFX7-NEXT: s_mov_b32 s11, s12 19242; GFX7-NEXT: s_mov_b32 s10, s13 19243; GFX7-NEXT: s_add_u32 s6, s6, s11 19244; GFX7-NEXT: s_addc_u32 s10, s7, s10 19245; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19246; GFX7-NEXT: s_mov_b32 s7, s10 19247; GFX7-NEXT: v_mov_b32_e32 v2, s9 19248; GFX7-NEXT: v_mov_b32_e32 v0, s8 19249; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19250; GFX7-NEXT: v_mov_b32_e32 v3, v0 19251; GFX7-NEXT: v_mov_b32_e32 v0, s6 19252; GFX7-NEXT: v_mov_b32_e32 v1, s7 19253; GFX7-NEXT: s_waitcnt vmcnt(0) 19254; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19255; GFX7-NEXT: s_waitcnt vmcnt(0) 19256; GFX7-NEXT: buffer_wbinvl1_vol 19257; GFX7-NEXT: v_mov_b32_e32 v0, s4 19258; GFX7-NEXT: v_mov_b32_e32 v1, s5 19259; GFX7-NEXT: flat_store_dword v[0:1], v2 19260; GFX7-NEXT: s_endpgm 19261; 19262; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19263; GFX10-WGP: ; %bb.0: ; %entry 19264; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19265; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19266; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19267; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19268; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19269; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19270; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19271; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19272; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19273; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19274; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19275; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19276; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19277; GFX10-WGP-NEXT: buffer_gl1_inv 19278; GFX10-WGP-NEXT: buffer_gl0_inv 19279; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19280; GFX10-WGP-NEXT: s_endpgm 19281; 19282; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19283; GFX10-CU: ; %bb.0: ; %entry 19284; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19285; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19286; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19287; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19288; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19289; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19290; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19291; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19292; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19293; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19294; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19295; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19296; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19297; GFX10-CU-NEXT: buffer_gl1_inv 19298; GFX10-CU-NEXT: buffer_gl0_inv 19299; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19300; GFX10-CU-NEXT: s_endpgm 19301; 19302; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19303; SKIP-CACHE-INV: ; %bb.0: ; %entry 19304; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19305; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19306; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19307; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19308; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19309; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19310; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19311; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19312; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19313; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19314; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19315; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19316; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19317; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19318; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19319; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19320; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19321; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19322; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19323; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19324; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19325; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19326; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19327; SKIP-CACHE-INV-NEXT: s_endpgm 19328; 19329; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19330; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19331; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19332; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19333; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19334; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19335; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19336; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19337; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19338; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19339; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19340; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19341; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19342; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19343; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19344; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19345; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19346; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19347; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19348; 19349; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19350; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19351; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19352; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19353; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19354; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19355; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19356; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19357; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19358; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19359; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19360; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19361; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19362; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19363; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19364; GFX90A-TGSPLIT-NEXT: buffer_invl2 19365; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19366; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19367; GFX90A-TGSPLIT-NEXT: s_endpgm 19368; 19369; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19370; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19371; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19372; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19373; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19374; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19375; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19376; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19377; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19378; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19379; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19380; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19381; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19382; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19383; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19384; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19385; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19386; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19387; 19388; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19389; GFX940-TGSPLIT: ; %bb.0: ; %entry 19390; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19391; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19392; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19393; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19394; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19395; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19396; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19397; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19398; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19399; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19400; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19401; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19402; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19403; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 19404; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19405; GFX940-TGSPLIT-NEXT: s_endpgm 19406; 19407; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19408; GFX11-WGP: ; %bb.0: ; %entry 19409; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19410; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19411; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19412; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19413; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19414; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19415; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19416; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19417; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19418; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19419; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19420; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19421; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19422; GFX11-WGP-NEXT: buffer_gl1_inv 19423; GFX11-WGP-NEXT: buffer_gl0_inv 19424; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19425; GFX11-WGP-NEXT: s_endpgm 19426; 19427; GFX11-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19428; GFX11-CU: ; %bb.0: ; %entry 19429; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19430; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19431; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19432; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19433; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19434; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19435; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19436; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19437; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19438; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19439; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19440; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19441; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19442; GFX11-CU-NEXT: buffer_gl1_inv 19443; GFX11-CU-NEXT: buffer_gl0_inv 19444; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19445; GFX11-CU-NEXT: s_endpgm 19446; 19447; GFX12-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19448; GFX12-WGP: ; %bb.0: ; %entry 19449; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19450; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19451; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19452; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19453; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19454; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19455; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19456; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19457; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19458; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19459; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19460; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19461; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19462; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19463; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19464; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19465; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19466; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19467; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 19468; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19469; GFX12-WGP-NEXT: s_endpgm 19470; 19471; GFX12-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 19472; GFX12-CU: ; %bb.0: ; %entry 19473; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19474; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19475; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19476; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19477; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19478; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19479; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19480; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19481; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19482; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19483; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19484; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19485; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19486; GFX12-CU-NEXT: s_wait_storecnt 0x0 19487; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19488; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19489; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19490; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19491; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 19492; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19493; GFX12-CU-NEXT: s_endpgm 19494 ptr addrspace(1) %out, i32 %in, i32 %old) { 19495entry: 19496 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19497 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 19498 %val0 = extractvalue { i32, i1 } %val, 0 19499 store i32 %val0, ptr addrspace(1) %out, align 4 19500 ret void 19501} 19502 19503define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( 19504; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19505; GFX6: ; %bb.0: ; %entry 19506; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19507; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19508; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19509; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19510; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19511; GFX6-NEXT: s_mov_b32 s12, s5 19512; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19513; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19514; GFX6-NEXT: s_mov_b32 s11, -1 19515; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19516; GFX6-NEXT: s_mov_b32 s5, s12 19517; GFX6-NEXT: s_mov_b32 s6, s11 19518; GFX6-NEXT: s_mov_b32 s7, s10 19519; GFX6-NEXT: v_mov_b32_e32 v0, s9 19520; GFX6-NEXT: v_mov_b32_e32 v2, s8 19521; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19522; GFX6-NEXT: v_mov_b32_e32 v1, v2 19523; GFX6-NEXT: s_waitcnt vmcnt(0) 19524; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19525; GFX6-NEXT: s_waitcnt vmcnt(0) 19526; GFX6-NEXT: buffer_wbinvl1 19527; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19528; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19529; GFX6-NEXT: s_endpgm 19530; 19531; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19532; GFX7: ; %bb.0: ; %entry 19533; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19534; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19535; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19536; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19537; GFX7-NEXT: s_mov_b64 s[12:13], 16 19538; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19539; GFX7-NEXT: s_mov_b32 s6, s4 19540; GFX7-NEXT: s_mov_b32 s7, s5 19541; GFX7-NEXT: s_mov_b32 s11, s12 19542; GFX7-NEXT: s_mov_b32 s10, s13 19543; GFX7-NEXT: s_add_u32 s6, s6, s11 19544; GFX7-NEXT: s_addc_u32 s10, s7, s10 19545; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19546; GFX7-NEXT: s_mov_b32 s7, s10 19547; GFX7-NEXT: v_mov_b32_e32 v2, s9 19548; GFX7-NEXT: v_mov_b32_e32 v0, s8 19549; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19550; GFX7-NEXT: v_mov_b32_e32 v3, v0 19551; GFX7-NEXT: v_mov_b32_e32 v0, s6 19552; GFX7-NEXT: v_mov_b32_e32 v1, s7 19553; GFX7-NEXT: s_waitcnt vmcnt(0) 19554; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19555; GFX7-NEXT: s_waitcnt vmcnt(0) 19556; GFX7-NEXT: buffer_wbinvl1_vol 19557; GFX7-NEXT: v_mov_b32_e32 v0, s4 19558; GFX7-NEXT: v_mov_b32_e32 v1, s5 19559; GFX7-NEXT: flat_store_dword v[0:1], v2 19560; GFX7-NEXT: s_endpgm 19561; 19562; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19563; GFX10-WGP: ; %bb.0: ; %entry 19564; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19565; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19566; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19567; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19568; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19569; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19570; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19571; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19572; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19573; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19574; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19575; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19576; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19577; GFX10-WGP-NEXT: buffer_gl1_inv 19578; GFX10-WGP-NEXT: buffer_gl0_inv 19579; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19580; GFX10-WGP-NEXT: s_endpgm 19581; 19582; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19583; GFX10-CU: ; %bb.0: ; %entry 19584; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19585; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19586; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19587; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19588; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19589; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19590; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19591; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19592; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19593; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19594; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19595; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19596; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19597; GFX10-CU-NEXT: buffer_gl1_inv 19598; GFX10-CU-NEXT: buffer_gl0_inv 19599; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19600; GFX10-CU-NEXT: s_endpgm 19601; 19602; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19603; SKIP-CACHE-INV: ; %bb.0: ; %entry 19604; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19605; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19606; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19607; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19608; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19609; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19610; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19611; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19612; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19613; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19614; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19615; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19616; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19617; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19618; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19619; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19620; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19621; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19622; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19623; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19624; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19625; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19626; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19627; SKIP-CACHE-INV-NEXT: s_endpgm 19628; 19629; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19630; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19631; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19632; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19633; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19634; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19635; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19636; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19637; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19638; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19639; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19640; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19641; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19642; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19643; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19644; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19645; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19646; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19647; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19648; 19649; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19650; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19651; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19652; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19653; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19654; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19655; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19656; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19657; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19658; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19659; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19660; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19661; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19662; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19663; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19664; GFX90A-TGSPLIT-NEXT: buffer_invl2 19665; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19666; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19667; GFX90A-TGSPLIT-NEXT: s_endpgm 19668; 19669; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19670; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19671; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19672; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19673; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19674; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19675; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19676; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19677; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19678; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19679; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19680; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19681; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19682; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19683; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19684; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19685; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19686; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19687; 19688; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19689; GFX940-TGSPLIT: ; %bb.0: ; %entry 19690; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19691; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19692; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19693; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19694; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19695; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19696; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19697; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19698; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19699; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19700; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19701; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19702; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19703; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 19704; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19705; GFX940-TGSPLIT-NEXT: s_endpgm 19706; 19707; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19708; GFX11-WGP: ; %bb.0: ; %entry 19709; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19710; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19711; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19712; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19713; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19714; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19715; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19716; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19717; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19718; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19719; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19720; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19721; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19722; GFX11-WGP-NEXT: buffer_gl1_inv 19723; GFX11-WGP-NEXT: buffer_gl0_inv 19724; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19725; GFX11-WGP-NEXT: s_endpgm 19726; 19727; GFX11-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19728; GFX11-CU: ; %bb.0: ; %entry 19729; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19730; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19731; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19732; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19733; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19734; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19735; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19736; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19737; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19738; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19739; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19740; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19741; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19742; GFX11-CU-NEXT: buffer_gl1_inv 19743; GFX11-CU-NEXT: buffer_gl0_inv 19744; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19745; GFX11-CU-NEXT: s_endpgm 19746; 19747; GFX12-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19748; GFX12-WGP: ; %bb.0: ; %entry 19749; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19750; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19751; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19752; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19753; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19754; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19755; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19756; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19757; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19758; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19759; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19760; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19761; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19762; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19763; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19764; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19765; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19766; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19767; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 19768; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19769; GFX12-WGP-NEXT: s_endpgm 19770; 19771; GFX12-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 19772; GFX12-CU: ; %bb.0: ; %entry 19773; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19774; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19775; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19776; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19777; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19778; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19779; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19780; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19781; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19782; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19783; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19784; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19785; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19786; GFX12-CU-NEXT: s_wait_storecnt 0x0 19787; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19788; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19789; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19790; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19791; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 19792; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19793; GFX12-CU-NEXT: s_endpgm 19794 ptr addrspace(1) %out, i32 %in, i32 %old) { 19795entry: 19796 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19797 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 19798 %val0 = extractvalue { i32, i1 } %val, 0 19799 store i32 %val0, ptr addrspace(1) %out, align 4 19800 ret void 19801} 19802 19803define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( 19804; GFX6-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19805; GFX6: ; %bb.0: ; %entry 19806; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 19807; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19808; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 19809; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 19810; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19811; GFX6-NEXT: s_mov_b32 s12, s5 19812; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 19813; GFX6-NEXT: s_mov_b32 s10, 0x100f000 19814; GFX6-NEXT: s_mov_b32 s11, -1 19815; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 19816; GFX6-NEXT: s_mov_b32 s5, s12 19817; GFX6-NEXT: s_mov_b32 s6, s11 19818; GFX6-NEXT: s_mov_b32 s7, s10 19819; GFX6-NEXT: v_mov_b32_e32 v0, s9 19820; GFX6-NEXT: v_mov_b32_e32 v2, s8 19821; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19822; GFX6-NEXT: v_mov_b32_e32 v1, v2 19823; GFX6-NEXT: s_waitcnt vmcnt(0) 19824; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 19825; GFX6-NEXT: s_waitcnt vmcnt(0) 19826; GFX6-NEXT: buffer_wbinvl1 19827; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19828; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 19829; GFX6-NEXT: s_endpgm 19830; 19831; GFX7-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19832; GFX7: ; %bb.0: ; %entry 19833; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19834; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19835; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19836; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19837; GFX7-NEXT: s_mov_b64 s[12:13], 16 19838; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19839; GFX7-NEXT: s_mov_b32 s6, s4 19840; GFX7-NEXT: s_mov_b32 s7, s5 19841; GFX7-NEXT: s_mov_b32 s11, s12 19842; GFX7-NEXT: s_mov_b32 s10, s13 19843; GFX7-NEXT: s_add_u32 s6, s6, s11 19844; GFX7-NEXT: s_addc_u32 s10, s7, s10 19845; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19846; GFX7-NEXT: s_mov_b32 s7, s10 19847; GFX7-NEXT: v_mov_b32_e32 v2, s9 19848; GFX7-NEXT: v_mov_b32_e32 v0, s8 19849; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19850; GFX7-NEXT: v_mov_b32_e32 v3, v0 19851; GFX7-NEXT: v_mov_b32_e32 v0, s6 19852; GFX7-NEXT: v_mov_b32_e32 v1, s7 19853; GFX7-NEXT: s_waitcnt vmcnt(0) 19854; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19855; GFX7-NEXT: s_waitcnt vmcnt(0) 19856; GFX7-NEXT: buffer_wbinvl1_vol 19857; GFX7-NEXT: v_mov_b32_e32 v0, s4 19858; GFX7-NEXT: v_mov_b32_e32 v1, s5 19859; GFX7-NEXT: flat_store_dword v[0:1], v2 19860; GFX7-NEXT: s_endpgm 19861; 19862; GFX10-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19863; GFX10-WGP: ; %bb.0: ; %entry 19864; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19865; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19866; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19867; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19868; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19869; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19870; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19871; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19872; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19873; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19874; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19875; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19876; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19877; GFX10-WGP-NEXT: buffer_gl1_inv 19878; GFX10-WGP-NEXT: buffer_gl0_inv 19879; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19880; GFX10-WGP-NEXT: s_endpgm 19881; 19882; GFX10-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19883; GFX10-CU: ; %bb.0: ; %entry 19884; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19885; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19886; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19887; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19888; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19889; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19890; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19891; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19892; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19893; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19894; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19895; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19896; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19897; GFX10-CU-NEXT: buffer_gl1_inv 19898; GFX10-CU-NEXT: buffer_gl0_inv 19899; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19900; GFX10-CU-NEXT: s_endpgm 19901; 19902; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19903; SKIP-CACHE-INV: ; %bb.0: ; %entry 19904; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19905; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19906; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19907; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19908; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19909; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19910; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19911; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19912; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19913; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19914; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19915; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19916; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19917; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19918; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19919; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19920; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19921; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19922; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19923; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19924; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19925; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19926; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19927; SKIP-CACHE-INV-NEXT: s_endpgm 19928; 19929; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19930; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19931; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19932; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19933; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19934; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19935; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19936; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19937; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19938; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19939; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19940; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19941; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19942; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19943; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19944; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19945; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19946; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19947; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19948; 19949; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19950; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19951; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19952; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19953; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19954; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19955; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19956; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19957; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19958; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19959; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19960; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19961; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19962; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19963; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19964; GFX90A-TGSPLIT-NEXT: buffer_invl2 19965; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19966; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19967; GFX90A-TGSPLIT-NEXT: s_endpgm 19968; 19969; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19970; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19971; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19972; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19973; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19974; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19975; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19976; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19977; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19978; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19979; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19980; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19981; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19982; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 19983; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19984; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19985; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19986; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19987; 19988; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 19989; GFX940-TGSPLIT: ; %bb.0: ; %entry 19990; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19991; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19992; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19993; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19994; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19995; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19996; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19997; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19998; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19999; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20000; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20001; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20002; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20003; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20004; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20005; GFX940-TGSPLIT-NEXT: s_endpgm 20006; 20007; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 20008; GFX11-WGP: ; %bb.0: ; %entry 20009; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20010; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20011; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20012; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20013; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20014; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20015; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20016; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20017; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20018; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20019; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20020; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20021; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20022; GFX11-WGP-NEXT: buffer_gl1_inv 20023; GFX11-WGP-NEXT: buffer_gl0_inv 20024; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20025; GFX11-WGP-NEXT: s_endpgm 20026; 20027; GFX11-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 20028; GFX11-CU: ; %bb.0: ; %entry 20029; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20030; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20031; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20032; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20033; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20034; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20035; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20036; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20037; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20038; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20039; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20040; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20041; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20042; GFX11-CU-NEXT: buffer_gl1_inv 20043; GFX11-CU-NEXT: buffer_gl0_inv 20044; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20045; GFX11-CU-NEXT: s_endpgm 20046; 20047; GFX12-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 20048; GFX12-WGP: ; %bb.0: ; %entry 20049; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20050; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20051; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20052; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20053; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20054; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20055; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20056; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20057; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20058; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 20059; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20060; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20061; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20062; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20063; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20064; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20065; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20066; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20067; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20068; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20069; GFX12-WGP-NEXT: s_endpgm 20070; 20071; GFX12-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: 20072; GFX12-CU: ; %bb.0: ; %entry 20073; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20074; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20075; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20076; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20077; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20078; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20079; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20080; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20081; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20082; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 20083; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20084; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20085; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20086; GFX12-CU-NEXT: s_wait_storecnt 0x0 20087; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20088; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20089; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20090; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20091; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20092; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20093; GFX12-CU-NEXT: s_endpgm 20094 ptr addrspace(1) %out, i32 %in, i32 %old) { 20095entry: 20096 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20097 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst 20098 %val0 = extractvalue { i32, i1 } %val, 0 20099 store i32 %val0, ptr addrspace(1) %out, align 4 20100 ret void 20101} 20102 20103define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( 20104; GFX6-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20105; GFX6: ; %bb.0: ; %entry 20106; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20107; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20108; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20109; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20110; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20111; GFX6-NEXT: s_mov_b32 s12, s5 20112; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20113; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20114; GFX6-NEXT: s_mov_b32 s11, -1 20115; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20116; GFX6-NEXT: s_mov_b32 s5, s12 20117; GFX6-NEXT: s_mov_b32 s6, s11 20118; GFX6-NEXT: s_mov_b32 s7, s10 20119; GFX6-NEXT: v_mov_b32_e32 v0, s9 20120; GFX6-NEXT: v_mov_b32_e32 v2, s8 20121; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20122; GFX6-NEXT: v_mov_b32_e32 v1, v2 20123; GFX6-NEXT: s_waitcnt vmcnt(0) 20124; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20125; GFX6-NEXT: s_waitcnt vmcnt(0) 20126; GFX6-NEXT: buffer_wbinvl1 20127; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20128; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20129; GFX6-NEXT: s_endpgm 20130; 20131; GFX7-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20132; GFX7: ; %bb.0: ; %entry 20133; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20134; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20135; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20136; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20137; GFX7-NEXT: s_mov_b64 s[12:13], 16 20138; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20139; GFX7-NEXT: s_mov_b32 s6, s4 20140; GFX7-NEXT: s_mov_b32 s7, s5 20141; GFX7-NEXT: s_mov_b32 s11, s12 20142; GFX7-NEXT: s_mov_b32 s10, s13 20143; GFX7-NEXT: s_add_u32 s6, s6, s11 20144; GFX7-NEXT: s_addc_u32 s10, s7, s10 20145; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20146; GFX7-NEXT: s_mov_b32 s7, s10 20147; GFX7-NEXT: v_mov_b32_e32 v2, s9 20148; GFX7-NEXT: v_mov_b32_e32 v0, s8 20149; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20150; GFX7-NEXT: v_mov_b32_e32 v3, v0 20151; GFX7-NEXT: v_mov_b32_e32 v0, s6 20152; GFX7-NEXT: v_mov_b32_e32 v1, s7 20153; GFX7-NEXT: s_waitcnt vmcnt(0) 20154; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20155; GFX7-NEXT: s_waitcnt vmcnt(0) 20156; GFX7-NEXT: buffer_wbinvl1_vol 20157; GFX7-NEXT: v_mov_b32_e32 v0, s4 20158; GFX7-NEXT: v_mov_b32_e32 v1, s5 20159; GFX7-NEXT: flat_store_dword v[0:1], v2 20160; GFX7-NEXT: s_endpgm 20161; 20162; GFX10-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20163; GFX10-WGP: ; %bb.0: ; %entry 20164; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20165; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20166; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20167; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20168; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20169; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20170; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20171; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20172; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20173; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20174; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20175; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20176; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20177; GFX10-WGP-NEXT: buffer_gl1_inv 20178; GFX10-WGP-NEXT: buffer_gl0_inv 20179; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20180; GFX10-WGP-NEXT: s_endpgm 20181; 20182; GFX10-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20183; GFX10-CU: ; %bb.0: ; %entry 20184; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20185; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20186; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20187; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20188; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20189; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20190; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20191; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20192; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20193; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20194; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20195; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20196; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20197; GFX10-CU-NEXT: buffer_gl1_inv 20198; GFX10-CU-NEXT: buffer_gl0_inv 20199; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20200; GFX10-CU-NEXT: s_endpgm 20201; 20202; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20203; SKIP-CACHE-INV: ; %bb.0: ; %entry 20204; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20205; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20206; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20207; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20208; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20209; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20210; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20211; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20212; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20213; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20214; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20215; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20216; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20217; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20218; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20219; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20220; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20221; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20222; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20223; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20224; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20225; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20226; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20227; SKIP-CACHE-INV-NEXT: s_endpgm 20228; 20229; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20230; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20231; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20232; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20233; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20234; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20235; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20236; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20237; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20238; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20239; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20240; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 20241; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20242; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20243; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20244; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20245; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20246; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20247; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20248; 20249; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20250; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20251; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20252; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20253; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20254; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20255; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20256; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20257; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20258; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20259; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20260; GFX90A-TGSPLIT-NEXT: buffer_wbl2 20261; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20262; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20263; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20264; GFX90A-TGSPLIT-NEXT: buffer_invl2 20265; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20266; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20267; GFX90A-TGSPLIT-NEXT: s_endpgm 20268; 20269; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20270; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20271; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20272; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20273; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20274; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20275; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20276; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20277; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20278; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20279; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20280; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20281; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20282; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20283; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20284; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20285; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20286; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20287; 20288; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20289; GFX940-TGSPLIT: ; %bb.0: ; %entry 20290; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20291; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20292; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20293; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20294; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20295; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20296; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20297; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20298; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20299; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20300; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20301; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20302; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20303; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20304; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20305; GFX940-TGSPLIT-NEXT: s_endpgm 20306; 20307; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20308; GFX11-WGP: ; %bb.0: ; %entry 20309; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20310; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20311; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20312; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20313; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20314; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20315; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20316; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20317; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20318; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20319; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20320; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20321; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20322; GFX11-WGP-NEXT: buffer_gl1_inv 20323; GFX11-WGP-NEXT: buffer_gl0_inv 20324; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20325; GFX11-WGP-NEXT: s_endpgm 20326; 20327; GFX11-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20328; GFX11-CU: ; %bb.0: ; %entry 20329; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20330; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20331; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20332; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20333; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20334; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20335; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20336; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20337; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20338; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20339; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20340; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20341; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20342; GFX11-CU-NEXT: buffer_gl1_inv 20343; GFX11-CU-NEXT: buffer_gl0_inv 20344; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20345; GFX11-CU-NEXT: s_endpgm 20346; 20347; GFX12-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20348; GFX12-WGP: ; %bb.0: ; %entry 20349; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20350; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20351; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20352; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20353; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20354; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20355; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20356; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20357; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20358; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 20359; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20360; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20361; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20362; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20363; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20364; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20365; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20366; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20367; GFX12-WGP-NEXT: s_endpgm 20368; 20369; GFX12-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: 20370; GFX12-CU: ; %bb.0: ; %entry 20371; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20372; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20373; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20374; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20375; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20376; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20377; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20378; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20379; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20380; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 20381; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20382; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20383; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20384; GFX12-CU-NEXT: s_wait_storecnt 0x0 20385; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20386; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20387; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20388; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20389; GFX12-CU-NEXT: s_endpgm 20390 ptr addrspace(1) %out, i32 %in, i32 %old) { 20391entry: 20392 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20393 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst 20394 %val0 = extractvalue { i32, i1 } %val, 0 20395 store i32 %val0, ptr addrspace(1) %out, align 4 20396 ret void 20397} 20398 20399define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( 20400; GFX6-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20401; GFX6: ; %bb.0: ; %entry 20402; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20403; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20404; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20405; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20406; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20407; GFX6-NEXT: s_mov_b32 s12, s5 20408; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20409; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20410; GFX6-NEXT: s_mov_b32 s11, -1 20411; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20412; GFX6-NEXT: s_mov_b32 s5, s12 20413; GFX6-NEXT: s_mov_b32 s6, s11 20414; GFX6-NEXT: s_mov_b32 s7, s10 20415; GFX6-NEXT: v_mov_b32_e32 v0, s9 20416; GFX6-NEXT: v_mov_b32_e32 v2, s8 20417; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20418; GFX6-NEXT: v_mov_b32_e32 v1, v2 20419; GFX6-NEXT: s_waitcnt vmcnt(0) 20420; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20421; GFX6-NEXT: s_waitcnt vmcnt(0) 20422; GFX6-NEXT: buffer_wbinvl1 20423; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20424; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20425; GFX6-NEXT: s_endpgm 20426; 20427; GFX7-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20428; GFX7: ; %bb.0: ; %entry 20429; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20430; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20431; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20432; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20433; GFX7-NEXT: s_mov_b64 s[12:13], 16 20434; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20435; GFX7-NEXT: s_mov_b32 s6, s4 20436; GFX7-NEXT: s_mov_b32 s7, s5 20437; GFX7-NEXT: s_mov_b32 s11, s12 20438; GFX7-NEXT: s_mov_b32 s10, s13 20439; GFX7-NEXT: s_add_u32 s6, s6, s11 20440; GFX7-NEXT: s_addc_u32 s10, s7, s10 20441; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20442; GFX7-NEXT: s_mov_b32 s7, s10 20443; GFX7-NEXT: v_mov_b32_e32 v2, s9 20444; GFX7-NEXT: v_mov_b32_e32 v0, s8 20445; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20446; GFX7-NEXT: v_mov_b32_e32 v3, v0 20447; GFX7-NEXT: v_mov_b32_e32 v0, s6 20448; GFX7-NEXT: v_mov_b32_e32 v1, s7 20449; GFX7-NEXT: s_waitcnt vmcnt(0) 20450; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20451; GFX7-NEXT: s_waitcnt vmcnt(0) 20452; GFX7-NEXT: buffer_wbinvl1_vol 20453; GFX7-NEXT: v_mov_b32_e32 v0, s4 20454; GFX7-NEXT: v_mov_b32_e32 v1, s5 20455; GFX7-NEXT: flat_store_dword v[0:1], v2 20456; GFX7-NEXT: s_endpgm 20457; 20458; GFX10-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20459; GFX10-WGP: ; %bb.0: ; %entry 20460; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20461; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20462; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20463; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20464; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20465; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20466; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20467; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20468; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20469; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20470; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20471; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20472; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20473; GFX10-WGP-NEXT: buffer_gl1_inv 20474; GFX10-WGP-NEXT: buffer_gl0_inv 20475; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20476; GFX10-WGP-NEXT: s_endpgm 20477; 20478; GFX10-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20479; GFX10-CU: ; %bb.0: ; %entry 20480; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20481; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20482; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20483; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20484; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20485; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20486; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20487; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20488; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20489; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20490; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20491; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20492; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20493; GFX10-CU-NEXT: buffer_gl1_inv 20494; GFX10-CU-NEXT: buffer_gl0_inv 20495; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20496; GFX10-CU-NEXT: s_endpgm 20497; 20498; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20499; SKIP-CACHE-INV: ; %bb.0: ; %entry 20500; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20501; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20502; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20503; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20504; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20505; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20506; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20507; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20508; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20509; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20510; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20511; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20512; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20513; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20514; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20515; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20516; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20517; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20518; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20519; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20520; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20521; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20522; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20523; SKIP-CACHE-INV-NEXT: s_endpgm 20524; 20525; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20526; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20527; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20528; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20529; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20530; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20531; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20532; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20533; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20534; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20535; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20536; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 20537; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20538; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20539; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20540; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20541; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20542; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20543; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20544; 20545; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20546; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20547; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20548; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20549; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20550; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20551; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20552; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20553; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20554; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20555; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20556; GFX90A-TGSPLIT-NEXT: buffer_wbl2 20557; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20558; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20559; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20560; GFX90A-TGSPLIT-NEXT: buffer_invl2 20561; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20562; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20563; GFX90A-TGSPLIT-NEXT: s_endpgm 20564; 20565; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20566; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20567; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20568; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20569; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20570; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20571; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20572; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20573; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20574; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20575; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20576; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20577; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20578; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20579; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20580; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20581; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20582; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20583; 20584; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20585; GFX940-TGSPLIT: ; %bb.0: ; %entry 20586; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20587; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20588; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20589; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20590; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20591; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20592; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20593; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20594; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20595; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20596; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20597; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20598; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20599; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20600; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20601; GFX940-TGSPLIT-NEXT: s_endpgm 20602; 20603; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20604; GFX11-WGP: ; %bb.0: ; %entry 20605; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20606; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20607; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20608; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20609; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20610; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20611; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20612; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20613; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20614; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20615; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20616; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20617; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20618; GFX11-WGP-NEXT: buffer_gl1_inv 20619; GFX11-WGP-NEXT: buffer_gl0_inv 20620; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20621; GFX11-WGP-NEXT: s_endpgm 20622; 20623; GFX11-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20624; GFX11-CU: ; %bb.0: ; %entry 20625; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20626; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20627; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20628; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20629; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20630; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20631; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20632; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20633; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20634; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20635; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20636; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20637; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20638; GFX11-CU-NEXT: buffer_gl1_inv 20639; GFX11-CU-NEXT: buffer_gl0_inv 20640; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20641; GFX11-CU-NEXT: s_endpgm 20642; 20643; GFX12-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20644; GFX12-WGP: ; %bb.0: ; %entry 20645; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20646; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20647; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20648; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20649; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20650; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20651; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20652; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20653; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20654; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 20655; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20656; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20657; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20658; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20659; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20660; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20661; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20662; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20663; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20664; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20665; GFX12-WGP-NEXT: s_endpgm 20666; 20667; GFX12-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: 20668; GFX12-CU: ; %bb.0: ; %entry 20669; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20670; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20671; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20672; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20673; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20674; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20675; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20676; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20677; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20678; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 20679; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20680; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20681; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20682; GFX12-CU-NEXT: s_wait_storecnt 0x0 20683; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20684; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20685; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20686; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20687; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20688; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20689; GFX12-CU-NEXT: s_endpgm 20690 ptr addrspace(1) %out, i32 %in, i32 %old) { 20691entry: 20692 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20693 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst 20694 %val0 = extractvalue { i32, i1 } %val, 0 20695 store i32 %val0, ptr addrspace(1) %out, align 4 20696 ret void 20697} 20698 20699define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( 20700; GFX6-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20701; GFX6: ; %bb.0: ; %entry 20702; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 20703; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20704; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 20705; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 20706; GFX6-NEXT: s_waitcnt lgkmcnt(0) 20707; GFX6-NEXT: s_mov_b32 s12, s5 20708; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 20709; GFX6-NEXT: s_mov_b32 s10, 0x100f000 20710; GFX6-NEXT: s_mov_b32 s11, -1 20711; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 20712; GFX6-NEXT: s_mov_b32 s5, s12 20713; GFX6-NEXT: s_mov_b32 s6, s11 20714; GFX6-NEXT: s_mov_b32 s7, s10 20715; GFX6-NEXT: v_mov_b32_e32 v0, s9 20716; GFX6-NEXT: v_mov_b32_e32 v2, s8 20717; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20718; GFX6-NEXT: v_mov_b32_e32 v1, v2 20719; GFX6-NEXT: s_waitcnt vmcnt(0) 20720; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 20721; GFX6-NEXT: s_waitcnt vmcnt(0) 20722; GFX6-NEXT: buffer_wbinvl1 20723; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20724; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 20725; GFX6-NEXT: s_endpgm 20726; 20727; GFX7-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20728; GFX7: ; %bb.0: ; %entry 20729; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20730; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20731; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20732; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20733; GFX7-NEXT: s_mov_b64 s[12:13], 16 20734; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20735; GFX7-NEXT: s_mov_b32 s6, s4 20736; GFX7-NEXT: s_mov_b32 s7, s5 20737; GFX7-NEXT: s_mov_b32 s11, s12 20738; GFX7-NEXT: s_mov_b32 s10, s13 20739; GFX7-NEXT: s_add_u32 s6, s6, s11 20740; GFX7-NEXT: s_addc_u32 s10, s7, s10 20741; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20742; GFX7-NEXT: s_mov_b32 s7, s10 20743; GFX7-NEXT: v_mov_b32_e32 v2, s9 20744; GFX7-NEXT: v_mov_b32_e32 v0, s8 20745; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20746; GFX7-NEXT: v_mov_b32_e32 v3, v0 20747; GFX7-NEXT: v_mov_b32_e32 v0, s6 20748; GFX7-NEXT: v_mov_b32_e32 v1, s7 20749; GFX7-NEXT: s_waitcnt vmcnt(0) 20750; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20751; GFX7-NEXT: s_waitcnt vmcnt(0) 20752; GFX7-NEXT: buffer_wbinvl1_vol 20753; GFX7-NEXT: v_mov_b32_e32 v0, s4 20754; GFX7-NEXT: v_mov_b32_e32 v1, s5 20755; GFX7-NEXT: flat_store_dword v[0:1], v2 20756; GFX7-NEXT: s_endpgm 20757; 20758; GFX10-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20759; GFX10-WGP: ; %bb.0: ; %entry 20760; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 20761; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20762; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 20763; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 20764; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20765; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20766; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 20767; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20768; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 20769; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20770; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20771; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20772; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20773; GFX10-WGP-NEXT: buffer_gl1_inv 20774; GFX10-WGP-NEXT: buffer_gl0_inv 20775; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 20776; GFX10-WGP-NEXT: s_endpgm 20777; 20778; GFX10-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20779; GFX10-CU: ; %bb.0: ; %entry 20780; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 20781; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20782; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 20783; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 20784; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20785; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20786; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 20787; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20788; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 20789; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20790; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20791; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 20792; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20793; GFX10-CU-NEXT: buffer_gl1_inv 20794; GFX10-CU-NEXT: buffer_gl0_inv 20795; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 20796; GFX10-CU-NEXT: s_endpgm 20797; 20798; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20799; SKIP-CACHE-INV: ; %bb.0: ; %entry 20800; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20801; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20802; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20803; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20804; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20805; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 20806; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 20807; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 20808; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 20809; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 20810; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 20811; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 20812; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 20814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 20815; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 20816; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 20817; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20818; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 20819; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20820; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 20821; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20822; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 20823; SKIP-CACHE-INV-NEXT: s_endpgm 20824; 20825; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20826; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20827; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20828; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20829; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20830; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20831; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20833; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20834; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20835; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20836; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 20837; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20838; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20839; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20840; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20841; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20842; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20843; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20844; 20845; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20846; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20847; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20848; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20849; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20850; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20851; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20852; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20853; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 20854; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20855; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20856; GFX90A-TGSPLIT-NEXT: buffer_wbl2 20857; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20858; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 20859; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20860; GFX90A-TGSPLIT-NEXT: buffer_invl2 20861; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20862; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 20863; GFX90A-TGSPLIT-NEXT: s_endpgm 20864; 20865; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20866; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20867; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20868; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20869; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20870; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20871; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20872; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20873; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20874; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20875; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20876; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20877; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20878; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20879; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20880; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20881; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20882; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20883; 20884; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20885; GFX940-TGSPLIT: ; %bb.0: ; %entry 20886; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 20887; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20888; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20889; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20890; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20891; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20892; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 20893; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20894; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 20895; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20896; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20897; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 20898; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20899; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20900; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 20901; GFX940-TGSPLIT-NEXT: s_endpgm 20902; 20903; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20904; GFX11-WGP: ; %bb.0: ; %entry 20905; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 20906; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20907; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20908; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20909; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20910; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 20911; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 20912; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20913; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 20914; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20915; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20916; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20917; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20918; GFX11-WGP-NEXT: buffer_gl1_inv 20919; GFX11-WGP-NEXT: buffer_gl0_inv 20920; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20921; GFX11-WGP-NEXT: s_endpgm 20922; 20923; GFX11-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20924; GFX11-CU: ; %bb.0: ; %entry 20925; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 20926; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20927; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20928; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20929; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20930; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 20931; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 20932; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20933; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 20934; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20935; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20936; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 20937; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20938; GFX11-CU-NEXT: buffer_gl1_inv 20939; GFX11-CU-NEXT: buffer_gl0_inv 20940; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20941; GFX11-CU-NEXT: s_endpgm 20942; 20943; GFX12-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20944; GFX12-WGP: ; %bb.0: ; %entry 20945; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 20946; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20947; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20948; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20949; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20950; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 20951; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 20952; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20953; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 20954; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 20955; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20956; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20957; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20958; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20959; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20960; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20961; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20962; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20963; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20964; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 20965; GFX12-WGP-NEXT: s_endpgm 20966; 20967; GFX12-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 20968; GFX12-CU: ; %bb.0: ; %entry 20969; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 20970; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20971; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20972; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20973; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20974; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 20975; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 20976; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 20977; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 20978; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 20979; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20980; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20981; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20982; GFX12-CU-NEXT: s_wait_storecnt 0x0 20983; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20984; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20985; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20986; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20987; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20988; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 20989; GFX12-CU-NEXT: s_endpgm 20990 ptr addrspace(1) %out, i32 %in, i32 %old) { 20991entry: 20992 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 20993 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst 20994 %val0 = extractvalue { i32, i1 } %val, 0 20995 store i32 %val0, ptr addrspace(1) %out, align 4 20996 ret void 20997} 20998 20999define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( 21000; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21001; GFX6: ; %bb.0: ; %entry 21002; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 21003; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21004; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 21005; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 21006; GFX6-NEXT: s_waitcnt lgkmcnt(0) 21007; GFX6-NEXT: s_mov_b32 s12, s5 21008; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 21009; GFX6-NEXT: s_mov_b32 s10, 0x100f000 21010; GFX6-NEXT: s_mov_b32 s11, -1 21011; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 21012; GFX6-NEXT: s_mov_b32 s5, s12 21013; GFX6-NEXT: s_mov_b32 s6, s11 21014; GFX6-NEXT: s_mov_b32 s7, s10 21015; GFX6-NEXT: v_mov_b32_e32 v0, s9 21016; GFX6-NEXT: v_mov_b32_e32 v2, s8 21017; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21018; GFX6-NEXT: v_mov_b32_e32 v1, v2 21019; GFX6-NEXT: s_waitcnt vmcnt(0) 21020; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 21021; GFX6-NEXT: s_waitcnt vmcnt(0) 21022; GFX6-NEXT: buffer_wbinvl1 21023; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21024; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 21025; GFX6-NEXT: s_endpgm 21026; 21027; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21028; GFX7: ; %bb.0: ; %entry 21029; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21030; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21031; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21032; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21033; GFX7-NEXT: s_mov_b64 s[12:13], 16 21034; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21035; GFX7-NEXT: s_mov_b32 s6, s4 21036; GFX7-NEXT: s_mov_b32 s7, s5 21037; GFX7-NEXT: s_mov_b32 s11, s12 21038; GFX7-NEXT: s_mov_b32 s10, s13 21039; GFX7-NEXT: s_add_u32 s6, s6, s11 21040; GFX7-NEXT: s_addc_u32 s10, s7, s10 21041; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21042; GFX7-NEXT: s_mov_b32 s7, s10 21043; GFX7-NEXT: v_mov_b32_e32 v2, s9 21044; GFX7-NEXT: v_mov_b32_e32 v0, s8 21045; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21046; GFX7-NEXT: v_mov_b32_e32 v3, v0 21047; GFX7-NEXT: v_mov_b32_e32 v0, s6 21048; GFX7-NEXT: v_mov_b32_e32 v1, s7 21049; GFX7-NEXT: s_waitcnt vmcnt(0) 21050; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21051; GFX7-NEXT: s_waitcnt vmcnt(0) 21052; GFX7-NEXT: buffer_wbinvl1_vol 21053; GFX7-NEXT: v_mov_b32_e32 v0, s4 21054; GFX7-NEXT: v_mov_b32_e32 v1, s5 21055; GFX7-NEXT: flat_store_dword v[0:1], v2 21056; GFX7-NEXT: s_endpgm 21057; 21058; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21059; GFX10-WGP: ; %bb.0: ; %entry 21060; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 21061; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21062; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 21063; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 21064; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21065; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21066; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 21067; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21068; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 21069; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21070; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21071; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21072; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21073; GFX10-WGP-NEXT: buffer_gl1_inv 21074; GFX10-WGP-NEXT: buffer_gl0_inv 21075; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 21076; GFX10-WGP-NEXT: s_endpgm 21077; 21078; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21079; GFX10-CU: ; %bb.0: ; %entry 21080; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 21081; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21082; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 21083; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 21084; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21085; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21086; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 21087; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21088; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 21089; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21090; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21091; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 21092; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21093; GFX10-CU-NEXT: buffer_gl1_inv 21094; GFX10-CU-NEXT: buffer_gl0_inv 21095; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 21096; GFX10-CU-NEXT: s_endpgm 21097; 21098; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21099; SKIP-CACHE-INV: ; %bb.0: ; %entry 21100; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21101; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21102; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21103; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21104; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21105; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 21106; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 21107; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 21108; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 21109; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 21110; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 21111; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 21112; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21113; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 21114; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 21115; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 21116; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 21117; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21118; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 21119; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21120; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 21121; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21122; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 21123; SKIP-CACHE-INV-NEXT: s_endpgm 21124; 21125; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21126; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21127; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21128; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21129; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21130; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21131; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21132; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21133; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21134; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21135; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21136; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 21137; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21138; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21139; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21140; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 21141; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21142; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21143; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21144; 21145; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21146; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21147; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21148; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21149; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21150; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21151; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21152; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21153; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 21154; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21155; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21156; GFX90A-TGSPLIT-NEXT: buffer_wbl2 21157; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21158; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 21159; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21160; GFX90A-TGSPLIT-NEXT: buffer_invl2 21161; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21162; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 21163; GFX90A-TGSPLIT-NEXT: s_endpgm 21164; 21165; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21166; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21167; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21168; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21169; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21170; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21171; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21172; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21173; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21174; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21175; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21176; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21177; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21178; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 21179; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21180; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 21181; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21182; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21183; 21184; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21185; GFX940-TGSPLIT: ; %bb.0: ; %entry 21186; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 21187; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21188; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21189; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21190; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21191; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21192; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 21193; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21194; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 21195; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21196; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21197; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 21198; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21199; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 21200; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 21201; GFX940-TGSPLIT-NEXT: s_endpgm 21202; 21203; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21204; GFX11-WGP: ; %bb.0: ; %entry 21205; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 21206; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21207; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21208; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21209; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21210; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 21211; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 21212; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21213; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 21214; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21215; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21216; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21217; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21218; GFX11-WGP-NEXT: buffer_gl1_inv 21219; GFX11-WGP-NEXT: buffer_gl0_inv 21220; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21221; GFX11-WGP-NEXT: s_endpgm 21222; 21223; GFX11-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21224; GFX11-CU: ; %bb.0: ; %entry 21225; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 21226; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21227; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21228; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21229; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21230; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 21231; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 21232; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21233; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 21234; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21235; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21236; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 21237; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21238; GFX11-CU-NEXT: buffer_gl1_inv 21239; GFX11-CU-NEXT: buffer_gl0_inv 21240; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21241; GFX11-CU-NEXT: s_endpgm 21242; 21243; GFX12-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21244; GFX12-WGP: ; %bb.0: ; %entry 21245; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 21246; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21247; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21248; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21249; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21250; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 21251; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 21252; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21253; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 21254; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 21255; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21256; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21257; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21258; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21259; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21260; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21261; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21262; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21263; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 21264; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 21265; GFX12-WGP-NEXT: s_endpgm 21266; 21267; GFX12-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 21268; GFX12-CU: ; %bb.0: ; %entry 21269; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 21270; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21271; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21272; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21273; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21274; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 21275; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 21276; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 21277; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 21278; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 21279; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21280; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21281; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21282; GFX12-CU-NEXT: s_wait_storecnt 0x0 21283; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21284; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21285; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21286; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21287; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 21288; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 21289; GFX12-CU-NEXT: s_endpgm 21290 ptr addrspace(1) %out, i32 %in, i32 %old) { 21291entry: 21292 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 21293 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 21294 %val0 = extractvalue { i32, i1 } %val, 0 21295 store i32 %val0, ptr addrspace(1) %out, align 4 21296 ret void 21297} 21298