1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s 11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s 12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s 13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s 14; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s 15 16define amdgpu_kernel void @global_wavefront_unordered_load( 17; GFX6-LABEL: global_wavefront_unordered_load: 18; GFX6: ; %bb.0: ; %entry 19; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 20; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 21; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 22; GFX6-NEXT: s_waitcnt lgkmcnt(0) 23; GFX6-NEXT: s_mov_b32 s6, s9 24; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 25; GFX6-NEXT: s_mov_b32 s12, 0x100f000 26; GFX6-NEXT: s_mov_b32 s13, -1 27; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 28; GFX6-NEXT: s_mov_b32 s9, s6 29; GFX6-NEXT: s_mov_b32 s10, s13 30; GFX6-NEXT: s_mov_b32 s11, s12 31; GFX6-NEXT: s_mov_b32 s14, s5 32; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 33; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 34; GFX6-NEXT: s_mov_b32 s5, s14 35; GFX6-NEXT: s_mov_b32 s6, s13 36; GFX6-NEXT: s_mov_b32 s7, s12 37; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 38; GFX6-NEXT: s_waitcnt vmcnt(0) 39; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 40; GFX6-NEXT: s_endpgm 41; 42; GFX7-LABEL: global_wavefront_unordered_load: 43; GFX7: ; %bb.0: ; %entry 44; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 45; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 46; GFX7-NEXT: s_waitcnt lgkmcnt(0) 47; GFX7-NEXT: v_mov_b32_e32 v0, s6 48; GFX7-NEXT: v_mov_b32_e32 v1, s7 49; GFX7-NEXT: flat_load_dword v2, v[0:1] 50; GFX7-NEXT: v_mov_b32_e32 v0, s4 51; GFX7-NEXT: v_mov_b32_e32 v1, s5 52; GFX7-NEXT: s_waitcnt vmcnt(0) 53; GFX7-NEXT: flat_store_dword v[0:1], v2 54; GFX7-NEXT: s_endpgm 55; 56; GFX10-WGP-LABEL: global_wavefront_unordered_load: 57; GFX10-WGP: ; %bb.0: ; %entry 58; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 59; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 60; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 61; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 62; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 63; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 64; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 65; GFX10-WGP-NEXT: s_endpgm 66; 67; GFX10-CU-LABEL: global_wavefront_unordered_load: 68; GFX10-CU: ; %bb.0: ; %entry 69; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 70; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 71; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 72; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 73; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 74; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 75; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 76; GFX10-CU-NEXT: s_endpgm 77; 78; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_load: 79; SKIP-CACHE-INV: ; %bb.0: ; %entry 80; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 81; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 82; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 83; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 84; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 85; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 86; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 87; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 88; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 89; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 90; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 91; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 92; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 93; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 94; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 95; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 96; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 97; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 98; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 99; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 100; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 101; SKIP-CACHE-INV-NEXT: s_endpgm 102; 103; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_unordered_load: 104; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 105; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 106; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 107; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 108; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 109; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 110; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 111; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 112; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 113; 114; GFX90A-TGSPLIT-LABEL: global_wavefront_unordered_load: 115; GFX90A-TGSPLIT: ; %bb.0: ; %entry 116; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 117; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 118; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 119; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 120; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 121; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 122; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 123; GFX90A-TGSPLIT-NEXT: s_endpgm 124; 125; GFX940-NOTTGSPLIT-LABEL: global_wavefront_unordered_load: 126; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 127; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 128; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 129; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 130; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 131; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 132; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 133; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 134; GFX940-NOTTGSPLIT-NEXT: s_endpgm 135; 136; GFX940-TGSPLIT-LABEL: global_wavefront_unordered_load: 137; GFX940-TGSPLIT: ; %bb.0: ; %entry 138; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 139; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 140; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 141; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 142; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 143; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 144; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 145; GFX940-TGSPLIT-NEXT: s_endpgm 146; 147; GFX11-WGP-LABEL: global_wavefront_unordered_load: 148; GFX11-WGP: ; %bb.0: ; %entry 149; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 150; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 151; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 152; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 153; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 154; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 155; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 156; GFX11-WGP-NEXT: s_endpgm 157; 158; GFX11-CU-LABEL: global_wavefront_unordered_load: 159; GFX11-CU: ; %bb.0: ; %entry 160; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 161; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 162; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 163; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 164; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 165; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 166; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 167; GFX11-CU-NEXT: s_endpgm 168; 169; GFX12-WGP-LABEL: global_wavefront_unordered_load: 170; GFX12-WGP: ; %bb.0: ; %entry 171; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 172; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 173; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 174; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 175; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 176; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 177; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 178; GFX12-WGP-NEXT: s_endpgm 179; 180; GFX12-CU-LABEL: global_wavefront_unordered_load: 181; GFX12-CU: ; %bb.0: ; %entry 182; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 183; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 184; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 185; GFX12-CU-NEXT: s_wait_kmcnt 0x0 186; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 187; GFX12-CU-NEXT: s_wait_loadcnt 0x0 188; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 189; GFX12-CU-NEXT: s_endpgm 190 ptr addrspace(1) %in, ptr addrspace(1) %out) { 191entry: 192 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") unordered, align 4 193 store i32 %val, ptr addrspace(1) %out 194 ret void 195} 196 197define amdgpu_kernel void @global_wavefront_monotonic_load( 198; GFX6-LABEL: global_wavefront_monotonic_load: 199; GFX6: ; %bb.0: ; %entry 200; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 201; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 202; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 203; GFX6-NEXT: s_waitcnt lgkmcnt(0) 204; GFX6-NEXT: s_mov_b32 s6, s9 205; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 206; GFX6-NEXT: s_mov_b32 s12, 0x100f000 207; GFX6-NEXT: s_mov_b32 s13, -1 208; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 209; GFX6-NEXT: s_mov_b32 s9, s6 210; GFX6-NEXT: s_mov_b32 s10, s13 211; GFX6-NEXT: s_mov_b32 s11, s12 212; GFX6-NEXT: s_mov_b32 s14, s5 213; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 214; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 215; GFX6-NEXT: s_mov_b32 s5, s14 216; GFX6-NEXT: s_mov_b32 s6, s13 217; GFX6-NEXT: s_mov_b32 s7, s12 218; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 219; GFX6-NEXT: s_waitcnt vmcnt(0) 220; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 221; GFX6-NEXT: s_endpgm 222; 223; GFX7-LABEL: global_wavefront_monotonic_load: 224; GFX7: ; %bb.0: ; %entry 225; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 226; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 227; GFX7-NEXT: s_waitcnt lgkmcnt(0) 228; GFX7-NEXT: v_mov_b32_e32 v0, s6 229; GFX7-NEXT: v_mov_b32_e32 v1, s7 230; GFX7-NEXT: flat_load_dword v2, v[0:1] 231; GFX7-NEXT: v_mov_b32_e32 v0, s4 232; GFX7-NEXT: v_mov_b32_e32 v1, s5 233; GFX7-NEXT: s_waitcnt vmcnt(0) 234; GFX7-NEXT: flat_store_dword v[0:1], v2 235; GFX7-NEXT: s_endpgm 236; 237; GFX10-WGP-LABEL: global_wavefront_monotonic_load: 238; GFX10-WGP: ; %bb.0: ; %entry 239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 240; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 241; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 242; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 243; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 244; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 245; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 246; GFX10-WGP-NEXT: s_endpgm 247; 248; GFX10-CU-LABEL: global_wavefront_monotonic_load: 249; GFX10-CU: ; %bb.0: ; %entry 250; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 251; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 252; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 253; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 254; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 255; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 256; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 257; GFX10-CU-NEXT: s_endpgm 258; 259; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_load: 260; SKIP-CACHE-INV: ; %bb.0: ; %entry 261; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 262; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 263; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 265; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 266; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 267; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 268; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 269; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 270; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 271; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 272; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 273; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 274; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 275; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 276; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 277; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 278; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 279; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 280; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 281; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 282; SKIP-CACHE-INV-NEXT: s_endpgm 283; 284; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_load: 285; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 286; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 287; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 288; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 289; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 290; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 291; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 292; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 293; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 294; 295; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_load: 296; GFX90A-TGSPLIT: ; %bb.0: ; %entry 297; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 298; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 299; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 300; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 301; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 302; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 303; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 304; GFX90A-TGSPLIT-NEXT: s_endpgm 305; 306; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_load: 307; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 308; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 309; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 310; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 311; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 312; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 313; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 314; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 315; GFX940-NOTTGSPLIT-NEXT: s_endpgm 316; 317; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_load: 318; GFX940-TGSPLIT: ; %bb.0: ; %entry 319; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 320; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 321; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 322; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 323; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 324; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 325; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 326; GFX940-TGSPLIT-NEXT: s_endpgm 327; 328; GFX11-WGP-LABEL: global_wavefront_monotonic_load: 329; GFX11-WGP: ; %bb.0: ; %entry 330; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 331; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 332; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 333; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 334; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 335; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 336; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 337; GFX11-WGP-NEXT: s_endpgm 338; 339; GFX11-CU-LABEL: global_wavefront_monotonic_load: 340; GFX11-CU: ; %bb.0: ; %entry 341; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 342; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 343; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 344; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 345; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 346; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 347; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 348; GFX11-CU-NEXT: s_endpgm 349; 350; GFX12-WGP-LABEL: global_wavefront_monotonic_load: 351; GFX12-WGP: ; %bb.0: ; %entry 352; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 353; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 354; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 355; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 356; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 357; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 358; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 359; GFX12-WGP-NEXT: s_endpgm 360; 361; GFX12-CU-LABEL: global_wavefront_monotonic_load: 362; GFX12-CU: ; %bb.0: ; %entry 363; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 364; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 365; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 366; GFX12-CU-NEXT: s_wait_kmcnt 0x0 367; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 368; GFX12-CU-NEXT: s_wait_loadcnt 0x0 369; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 370; GFX12-CU-NEXT: s_endpgm 371 ptr addrspace(1) %in, ptr addrspace(1) %out) { 372entry: 373 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") monotonic, align 4 374 store i32 %val, ptr addrspace(1) %out 375 ret void 376} 377 378define amdgpu_kernel void @global_wavefront_acquire_load( 379; GFX6-LABEL: global_wavefront_acquire_load: 380; GFX6: ; %bb.0: ; %entry 381; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 382; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 383; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 384; GFX6-NEXT: s_waitcnt lgkmcnt(0) 385; GFX6-NEXT: s_mov_b32 s6, s9 386; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 387; GFX6-NEXT: s_mov_b32 s12, 0x100f000 388; GFX6-NEXT: s_mov_b32 s13, -1 389; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 390; GFX6-NEXT: s_mov_b32 s9, s6 391; GFX6-NEXT: s_mov_b32 s10, s13 392; GFX6-NEXT: s_mov_b32 s11, s12 393; GFX6-NEXT: s_mov_b32 s14, s5 394; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 395; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 396; GFX6-NEXT: s_mov_b32 s5, s14 397; GFX6-NEXT: s_mov_b32 s6, s13 398; GFX6-NEXT: s_mov_b32 s7, s12 399; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 400; GFX6-NEXT: s_waitcnt vmcnt(0) 401; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 402; GFX6-NEXT: s_endpgm 403; 404; GFX7-LABEL: global_wavefront_acquire_load: 405; GFX7: ; %bb.0: ; %entry 406; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 407; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 408; GFX7-NEXT: s_waitcnt lgkmcnt(0) 409; GFX7-NEXT: v_mov_b32_e32 v0, s6 410; GFX7-NEXT: v_mov_b32_e32 v1, s7 411; GFX7-NEXT: flat_load_dword v2, v[0:1] 412; GFX7-NEXT: v_mov_b32_e32 v0, s4 413; GFX7-NEXT: v_mov_b32_e32 v1, s5 414; GFX7-NEXT: s_waitcnt vmcnt(0) 415; GFX7-NEXT: flat_store_dword v[0:1], v2 416; GFX7-NEXT: s_endpgm 417; 418; GFX10-WGP-LABEL: global_wavefront_acquire_load: 419; GFX10-WGP: ; %bb.0: ; %entry 420; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 421; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 422; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 423; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 424; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 425; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 426; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 427; GFX10-WGP-NEXT: s_endpgm 428; 429; GFX10-CU-LABEL: global_wavefront_acquire_load: 430; GFX10-CU: ; %bb.0: ; %entry 431; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 432; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 433; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 434; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 435; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 436; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 437; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 438; GFX10-CU-NEXT: s_endpgm 439; 440; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_load: 441; SKIP-CACHE-INV: ; %bb.0: ; %entry 442; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 443; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 444; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 445; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 446; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 447; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 448; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 449; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 450; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 451; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 452; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 453; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 454; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 455; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 456; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 457; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 458; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 459; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 460; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 461; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 462; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 463; SKIP-CACHE-INV-NEXT: s_endpgm 464; 465; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_load: 466; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 467; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 468; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 469; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 470; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 471; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 472; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 473; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 474; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 475; 476; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_load: 477; GFX90A-TGSPLIT: ; %bb.0: ; %entry 478; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 479; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 480; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 481; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 482; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 483; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 484; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 485; GFX90A-TGSPLIT-NEXT: s_endpgm 486; 487; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_load: 488; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 489; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 490; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 491; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 492; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 493; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 494; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 495; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 496; GFX940-NOTTGSPLIT-NEXT: s_endpgm 497; 498; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_load: 499; GFX940-TGSPLIT: ; %bb.0: ; %entry 500; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 501; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 502; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 503; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 504; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 505; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 506; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 507; GFX940-TGSPLIT-NEXT: s_endpgm 508; 509; GFX11-WGP-LABEL: global_wavefront_acquire_load: 510; GFX11-WGP: ; %bb.0: ; %entry 511; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 512; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 513; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 514; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 515; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 516; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 517; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 518; GFX11-WGP-NEXT: s_endpgm 519; 520; GFX11-CU-LABEL: global_wavefront_acquire_load: 521; GFX11-CU: ; %bb.0: ; %entry 522; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 523; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 524; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 525; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 526; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 527; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 528; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 529; GFX11-CU-NEXT: s_endpgm 530; 531; GFX12-WGP-LABEL: global_wavefront_acquire_load: 532; GFX12-WGP: ; %bb.0: ; %entry 533; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 534; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 535; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 536; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 537; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 538; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 539; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 540; GFX12-WGP-NEXT: s_endpgm 541; 542; GFX12-CU-LABEL: global_wavefront_acquire_load: 543; GFX12-CU: ; %bb.0: ; %entry 544; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 545; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 546; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 547; GFX12-CU-NEXT: s_wait_kmcnt 0x0 548; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 549; GFX12-CU-NEXT: s_wait_loadcnt 0x0 550; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 551; GFX12-CU-NEXT: s_endpgm 552 ptr addrspace(1) %in, ptr addrspace(1) %out) { 553entry: 554 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") acquire, align 4 555 store i32 %val, ptr addrspace(1) %out 556 ret void 557} 558 559define amdgpu_kernel void @global_wavefront_seq_cst_load( 560; GFX6-LABEL: global_wavefront_seq_cst_load: 561; GFX6: ; %bb.0: ; %entry 562; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 563; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 564; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 565; GFX6-NEXT: s_waitcnt lgkmcnt(0) 566; GFX6-NEXT: s_mov_b32 s6, s9 567; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 568; GFX6-NEXT: s_mov_b32 s12, 0x100f000 569; GFX6-NEXT: s_mov_b32 s13, -1 570; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 571; GFX6-NEXT: s_mov_b32 s9, s6 572; GFX6-NEXT: s_mov_b32 s10, s13 573; GFX6-NEXT: s_mov_b32 s11, s12 574; GFX6-NEXT: s_mov_b32 s14, s5 575; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 576; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 577; GFX6-NEXT: s_mov_b32 s5, s14 578; GFX6-NEXT: s_mov_b32 s6, s13 579; GFX6-NEXT: s_mov_b32 s7, s12 580; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 581; GFX6-NEXT: s_waitcnt vmcnt(0) 582; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 583; GFX6-NEXT: s_endpgm 584; 585; GFX7-LABEL: global_wavefront_seq_cst_load: 586; GFX7: ; %bb.0: ; %entry 587; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 588; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 589; GFX7-NEXT: s_waitcnt lgkmcnt(0) 590; GFX7-NEXT: v_mov_b32_e32 v0, s6 591; GFX7-NEXT: v_mov_b32_e32 v1, s7 592; GFX7-NEXT: flat_load_dword v2, v[0:1] 593; GFX7-NEXT: v_mov_b32_e32 v0, s4 594; GFX7-NEXT: v_mov_b32_e32 v1, s5 595; GFX7-NEXT: s_waitcnt vmcnt(0) 596; GFX7-NEXT: flat_store_dword v[0:1], v2 597; GFX7-NEXT: s_endpgm 598; 599; GFX10-WGP-LABEL: global_wavefront_seq_cst_load: 600; GFX10-WGP: ; %bb.0: ; %entry 601; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 602; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 603; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 604; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 605; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 606; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 607; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 608; GFX10-WGP-NEXT: s_endpgm 609; 610; GFX10-CU-LABEL: global_wavefront_seq_cst_load: 611; GFX10-CU: ; %bb.0: ; %entry 612; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 613; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 614; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 615; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 616; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 617; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 618; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 619; GFX10-CU-NEXT: s_endpgm 620; 621; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_load: 622; SKIP-CACHE-INV: ; %bb.0: ; %entry 623; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 624; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 625; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 626; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 627; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 628; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 629; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 630; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 631; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 632; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 633; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 634; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 635; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 636; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 637; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 638; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 639; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 640; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 641; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 642; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 643; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 644; SKIP-CACHE-INV-NEXT: s_endpgm 645; 646; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_load: 647; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 648; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 649; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 650; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 651; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 652; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 653; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 654; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 655; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 656; 657; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_load: 658; GFX90A-TGSPLIT: ; %bb.0: ; %entry 659; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 660; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 661; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 662; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 663; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 664; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 665; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 666; GFX90A-TGSPLIT-NEXT: s_endpgm 667; 668; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_load: 669; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 670; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 671; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 672; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 673; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 674; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 675; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 676; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 677; GFX940-NOTTGSPLIT-NEXT: s_endpgm 678; 679; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_load: 680; GFX940-TGSPLIT: ; %bb.0: ; %entry 681; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 682; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 683; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 684; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 685; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 686; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 687; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 688; GFX940-TGSPLIT-NEXT: s_endpgm 689; 690; GFX11-WGP-LABEL: global_wavefront_seq_cst_load: 691; GFX11-WGP: ; %bb.0: ; %entry 692; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 693; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 694; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 695; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 696; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 697; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 698; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 699; GFX11-WGP-NEXT: s_endpgm 700; 701; GFX11-CU-LABEL: global_wavefront_seq_cst_load: 702; GFX11-CU: ; %bb.0: ; %entry 703; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 704; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 705; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 706; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 707; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 708; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 709; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 710; GFX11-CU-NEXT: s_endpgm 711; 712; GFX12-WGP-LABEL: global_wavefront_seq_cst_load: 713; GFX12-WGP: ; %bb.0: ; %entry 714; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 715; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 716; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 717; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 718; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 719; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 720; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 721; GFX12-WGP-NEXT: s_endpgm 722; 723; GFX12-CU-LABEL: global_wavefront_seq_cst_load: 724; GFX12-CU: ; %bb.0: ; %entry 725; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 726; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 727; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 728; GFX12-CU-NEXT: s_wait_kmcnt 0x0 729; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 730; GFX12-CU-NEXT: s_wait_loadcnt 0x0 731; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 732; GFX12-CU-NEXT: s_endpgm 733 ptr addrspace(1) %in, ptr addrspace(1) %out) { 734entry: 735 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") seq_cst, align 4 736 store i32 %val, ptr addrspace(1) %out 737 ret void 738} 739 740define amdgpu_kernel void @global_wavefront_unordered_store( 741; GFX6-LABEL: global_wavefront_unordered_store: 742; GFX6: ; %bb.0: ; %entry 743; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 744; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 745; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 746; GFX6-NEXT: s_waitcnt lgkmcnt(0) 747; GFX6-NEXT: s_mov_b32 s11, s5 748; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 749; GFX6-NEXT: s_mov_b32 s9, 0x100f000 750; GFX6-NEXT: s_mov_b32 s10, -1 751; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 752; GFX6-NEXT: s_mov_b32 s5, s11 753; GFX6-NEXT: s_mov_b32 s6, s10 754; GFX6-NEXT: s_mov_b32 s7, s9 755; GFX6-NEXT: v_mov_b32_e32 v0, s8 756; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 757; GFX6-NEXT: s_endpgm 758; 759; GFX7-LABEL: global_wavefront_unordered_store: 760; GFX7: ; %bb.0: ; %entry 761; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 762; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 763; GFX7-NEXT: s_waitcnt lgkmcnt(0) 764; GFX7-NEXT: v_mov_b32_e32 v0, s6 765; GFX7-NEXT: v_mov_b32_e32 v1, s7 766; GFX7-NEXT: v_mov_b32_e32 v2, s4 767; GFX7-NEXT: flat_store_dword v[0:1], v2 768; GFX7-NEXT: s_endpgm 769; 770; GFX10-WGP-LABEL: global_wavefront_unordered_store: 771; GFX10-WGP: ; %bb.0: ; %entry 772; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 773; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 774; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 775; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 776; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 777; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 778; GFX10-WGP-NEXT: s_endpgm 779; 780; GFX10-CU-LABEL: global_wavefront_unordered_store: 781; GFX10-CU: ; %bb.0: ; %entry 782; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 783; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 784; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 785; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 786; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 787; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 788; GFX10-CU-NEXT: s_endpgm 789; 790; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_store: 791; SKIP-CACHE-INV: ; %bb.0: ; %entry 792; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 793; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 794; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 795; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 796; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 797; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 798; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 799; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 800; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 801; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 802; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 803; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 804; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 805; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 806; SKIP-CACHE-INV-NEXT: s_endpgm 807; 808; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_unordered_store: 809; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 810; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 811; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 812; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 813; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 814; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 815; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 816; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 817; 818; GFX90A-TGSPLIT-LABEL: global_wavefront_unordered_store: 819; GFX90A-TGSPLIT: ; %bb.0: ; %entry 820; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 821; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 822; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 823; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 824; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 825; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 826; GFX90A-TGSPLIT-NEXT: s_endpgm 827; 828; GFX940-NOTTGSPLIT-LABEL: global_wavefront_unordered_store: 829; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 830; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 831; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 832; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 833; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 834; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 835; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 836; GFX940-NOTTGSPLIT-NEXT: s_endpgm 837; 838; GFX940-TGSPLIT-LABEL: global_wavefront_unordered_store: 839; GFX940-TGSPLIT: ; %bb.0: ; %entry 840; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 841; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 842; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 843; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 844; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 845; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 846; GFX940-TGSPLIT-NEXT: s_endpgm 847; 848; GFX11-WGP-LABEL: global_wavefront_unordered_store: 849; GFX11-WGP: ; %bb.0: ; %entry 850; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 851; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 852; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 853; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 854; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 855; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 856; GFX11-WGP-NEXT: s_endpgm 857; 858; GFX11-CU-LABEL: global_wavefront_unordered_store: 859; GFX11-CU: ; %bb.0: ; %entry 860; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 861; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 862; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 863; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 864; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 865; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 866; GFX11-CU-NEXT: s_endpgm 867; 868; GFX12-WGP-LABEL: global_wavefront_unordered_store: 869; GFX12-WGP: ; %bb.0: ; %entry 870; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 871; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 872; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 873; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 874; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 875; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 876; GFX12-WGP-NEXT: s_endpgm 877; 878; GFX12-CU-LABEL: global_wavefront_unordered_store: 879; GFX12-CU: ; %bb.0: ; %entry 880; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 881; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 882; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 883; GFX12-CU-NEXT: s_wait_kmcnt 0x0 884; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 885; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 886; GFX12-CU-NEXT: s_endpgm 887 i32 %in, ptr addrspace(1) %out) { 888entry: 889 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") unordered, align 4 890 ret void 891} 892 893define amdgpu_kernel void @global_wavefront_monotonic_store( 894; GFX6-LABEL: global_wavefront_monotonic_store: 895; GFX6: ; %bb.0: ; %entry 896; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 897; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 898; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 899; GFX6-NEXT: s_waitcnt lgkmcnt(0) 900; GFX6-NEXT: s_mov_b32 s11, s5 901; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 902; GFX6-NEXT: s_mov_b32 s9, 0x100f000 903; GFX6-NEXT: s_mov_b32 s10, -1 904; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 905; GFX6-NEXT: s_mov_b32 s5, s11 906; GFX6-NEXT: s_mov_b32 s6, s10 907; GFX6-NEXT: s_mov_b32 s7, s9 908; GFX6-NEXT: v_mov_b32_e32 v0, s8 909; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 910; GFX6-NEXT: s_endpgm 911; 912; GFX7-LABEL: global_wavefront_monotonic_store: 913; GFX7: ; %bb.0: ; %entry 914; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 915; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 916; GFX7-NEXT: s_waitcnt lgkmcnt(0) 917; GFX7-NEXT: v_mov_b32_e32 v0, s6 918; GFX7-NEXT: v_mov_b32_e32 v1, s7 919; GFX7-NEXT: v_mov_b32_e32 v2, s4 920; GFX7-NEXT: flat_store_dword v[0:1], v2 921; GFX7-NEXT: s_endpgm 922; 923; GFX10-WGP-LABEL: global_wavefront_monotonic_store: 924; GFX10-WGP: ; %bb.0: ; %entry 925; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 926; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 927; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 928; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 929; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 930; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 931; GFX10-WGP-NEXT: s_endpgm 932; 933; GFX10-CU-LABEL: global_wavefront_monotonic_store: 934; GFX10-CU: ; %bb.0: ; %entry 935; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 936; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 937; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 938; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 939; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 940; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 941; GFX10-CU-NEXT: s_endpgm 942; 943; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_store: 944; SKIP-CACHE-INV: ; %bb.0: ; %entry 945; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 946; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 947; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 948; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 949; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 950; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 951; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 952; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 953; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 954; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 955; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 956; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 957; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 958; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 959; SKIP-CACHE-INV-NEXT: s_endpgm 960; 961; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_store: 962; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 963; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 964; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 965; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 966; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 967; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 968; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 969; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 970; 971; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_store: 972; GFX90A-TGSPLIT: ; %bb.0: ; %entry 973; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 974; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 975; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 976; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 977; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 978; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 979; GFX90A-TGSPLIT-NEXT: s_endpgm 980; 981; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_store: 982; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 983; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 984; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 985; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 986; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 987; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 988; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 989; GFX940-NOTTGSPLIT-NEXT: s_endpgm 990; 991; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_store: 992; GFX940-TGSPLIT: ; %bb.0: ; %entry 993; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 994; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 995; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 996; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 997; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 998; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 999; GFX940-TGSPLIT-NEXT: s_endpgm 1000; 1001; GFX11-WGP-LABEL: global_wavefront_monotonic_store: 1002; GFX11-WGP: ; %bb.0: ; %entry 1003; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1004; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1005; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1006; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1007; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1008; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1009; GFX11-WGP-NEXT: s_endpgm 1010; 1011; GFX11-CU-LABEL: global_wavefront_monotonic_store: 1012; GFX11-CU: ; %bb.0: ; %entry 1013; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1014; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1015; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1016; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1018; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1019; GFX11-CU-NEXT: s_endpgm 1020; 1021; GFX12-WGP-LABEL: global_wavefront_monotonic_store: 1022; GFX12-WGP: ; %bb.0: ; %entry 1023; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1024; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1025; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1026; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1027; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1028; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1029; GFX12-WGP-NEXT: s_endpgm 1030; 1031; GFX12-CU-LABEL: global_wavefront_monotonic_store: 1032; GFX12-CU: ; %bb.0: ; %entry 1033; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1034; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1035; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1036; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1037; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1038; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1039; GFX12-CU-NEXT: s_endpgm 1040 i32 %in, ptr addrspace(1) %out) { 1041entry: 1042 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4 1043 ret void 1044} 1045 1046define amdgpu_kernel void @global_wavefront_release_store( 1047; GFX6-LABEL: global_wavefront_release_store: 1048; GFX6: ; %bb.0: ; %entry 1049; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1050; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1051; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1052; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1053; GFX6-NEXT: s_mov_b32 s11, s5 1054; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1055; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1056; GFX6-NEXT: s_mov_b32 s10, -1 1057; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1058; GFX6-NEXT: s_mov_b32 s5, s11 1059; GFX6-NEXT: s_mov_b32 s6, s10 1060; GFX6-NEXT: s_mov_b32 s7, s9 1061; GFX6-NEXT: v_mov_b32_e32 v0, s8 1062; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1063; GFX6-NEXT: s_endpgm 1064; 1065; GFX7-LABEL: global_wavefront_release_store: 1066; GFX7: ; %bb.0: ; %entry 1067; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1068; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1069; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1070; GFX7-NEXT: v_mov_b32_e32 v0, s6 1071; GFX7-NEXT: v_mov_b32_e32 v1, s7 1072; GFX7-NEXT: v_mov_b32_e32 v2, s4 1073; GFX7-NEXT: flat_store_dword v[0:1], v2 1074; GFX7-NEXT: s_endpgm 1075; 1076; GFX10-WGP-LABEL: global_wavefront_release_store: 1077; GFX10-WGP: ; %bb.0: ; %entry 1078; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1079; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1080; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1081; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1082; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1083; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1084; GFX10-WGP-NEXT: s_endpgm 1085; 1086; GFX10-CU-LABEL: global_wavefront_release_store: 1087; GFX10-CU: ; %bb.0: ; %entry 1088; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1089; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1090; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1091; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1092; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1093; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1094; GFX10-CU-NEXT: s_endpgm 1095; 1096; SKIP-CACHE-INV-LABEL: global_wavefront_release_store: 1097; SKIP-CACHE-INV: ; %bb.0: ; %entry 1098; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1099; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1100; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1101; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1102; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1103; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1104; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1105; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1106; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1107; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1108; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1109; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1110; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1111; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1112; SKIP-CACHE-INV-NEXT: s_endpgm 1113; 1114; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_store: 1115; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1116; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1117; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1118; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1119; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1120; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1121; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1122; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1123; 1124; GFX90A-TGSPLIT-LABEL: global_wavefront_release_store: 1125; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1126; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1127; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1128; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1129; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1130; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1131; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1132; GFX90A-TGSPLIT-NEXT: s_endpgm 1133; 1134; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_store: 1135; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1136; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1137; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1138; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1139; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1140; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1141; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1142; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1143; 1144; GFX940-TGSPLIT-LABEL: global_wavefront_release_store: 1145; GFX940-TGSPLIT: ; %bb.0: ; %entry 1146; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1147; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1148; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1149; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1150; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1151; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1152; GFX940-TGSPLIT-NEXT: s_endpgm 1153; 1154; GFX11-WGP-LABEL: global_wavefront_release_store: 1155; GFX11-WGP: ; %bb.0: ; %entry 1156; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1157; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1158; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1159; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1160; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1161; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1162; GFX11-WGP-NEXT: s_endpgm 1163; 1164; GFX11-CU-LABEL: global_wavefront_release_store: 1165; GFX11-CU: ; %bb.0: ; %entry 1166; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1167; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1168; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1169; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1170; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1171; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1172; GFX11-CU-NEXT: s_endpgm 1173; 1174; GFX12-WGP-LABEL: global_wavefront_release_store: 1175; GFX12-WGP: ; %bb.0: ; %entry 1176; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1177; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1178; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1179; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1180; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1181; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1182; GFX12-WGP-NEXT: s_endpgm 1183; 1184; GFX12-CU-LABEL: global_wavefront_release_store: 1185; GFX12-CU: ; %bb.0: ; %entry 1186; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1187; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1188; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1189; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1190; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1191; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1192; GFX12-CU-NEXT: s_endpgm 1193 i32 %in, ptr addrspace(1) %out) { 1194entry: 1195 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") release, align 4 1196 ret void 1197} 1198 1199define amdgpu_kernel void @global_wavefront_seq_cst_store( 1200; GFX6-LABEL: global_wavefront_seq_cst_store: 1201; GFX6: ; %bb.0: ; %entry 1202; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 1203; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 1204; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 1205; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1206; GFX6-NEXT: s_mov_b32 s11, s5 1207; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1208; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1209; GFX6-NEXT: s_mov_b32 s10, -1 1210; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1211; GFX6-NEXT: s_mov_b32 s5, s11 1212; GFX6-NEXT: s_mov_b32 s6, s10 1213; GFX6-NEXT: s_mov_b32 s7, s9 1214; GFX6-NEXT: v_mov_b32_e32 v0, s8 1215; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1216; GFX6-NEXT: s_endpgm 1217; 1218; GFX7-LABEL: global_wavefront_seq_cst_store: 1219; GFX7: ; %bb.0: ; %entry 1220; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1221; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1222; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1223; GFX7-NEXT: v_mov_b32_e32 v0, s6 1224; GFX7-NEXT: v_mov_b32_e32 v1, s7 1225; GFX7-NEXT: v_mov_b32_e32 v2, s4 1226; GFX7-NEXT: flat_store_dword v[0:1], v2 1227; GFX7-NEXT: s_endpgm 1228; 1229; GFX10-WGP-LABEL: global_wavefront_seq_cst_store: 1230; GFX10-WGP: ; %bb.0: ; %entry 1231; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 1232; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1233; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1234; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1235; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1236; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 1237; GFX10-WGP-NEXT: s_endpgm 1238; 1239; GFX10-CU-LABEL: global_wavefront_seq_cst_store: 1240; GFX10-CU: ; %bb.0: ; %entry 1241; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 1242; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1243; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1244; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1245; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1246; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 1247; GFX10-CU-NEXT: s_endpgm 1248; 1249; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_store: 1250; SKIP-CACHE-INV: ; %bb.0: ; %entry 1251; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 1252; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 1253; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 1254; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1255; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1256; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1257; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1258; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1259; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1260; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1261; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1262; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1263; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1264; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 1265; SKIP-CACHE-INV-NEXT: s_endpgm 1266; 1267; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_store: 1268; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1269; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1270; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1271; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1272; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1273; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1274; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1275; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1276; 1277; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_store: 1278; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1279; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 1280; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 1281; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1282; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1283; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1284; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 1285; GFX90A-TGSPLIT-NEXT: s_endpgm 1286; 1287; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_store: 1288; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1289; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1290; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1291; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1292; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1293; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1294; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1295; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1296; 1297; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_store: 1298; GFX940-TGSPLIT: ; %bb.0: ; %entry 1299; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 1300; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1301; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1302; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1303; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1304; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 1305; GFX940-TGSPLIT-NEXT: s_endpgm 1306; 1307; GFX11-WGP-LABEL: global_wavefront_seq_cst_store: 1308; GFX11-WGP: ; %bb.0: ; %entry 1309; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1310; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1311; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1312; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1313; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1314; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1315; GFX11-WGP-NEXT: s_endpgm 1316; 1317; GFX11-CU-LABEL: global_wavefront_seq_cst_store: 1318; GFX11-CU: ; %bb.0: ; %entry 1319; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1320; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1321; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1322; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1323; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1324; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1325; GFX11-CU-NEXT: s_endpgm 1326; 1327; GFX12-WGP-LABEL: global_wavefront_seq_cst_store: 1328; GFX12-WGP: ; %bb.0: ; %entry 1329; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 1330; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1331; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1332; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1333; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1334; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 1335; GFX12-WGP-NEXT: s_endpgm 1336; 1337; GFX12-CU-LABEL: global_wavefront_seq_cst_store: 1338; GFX12-CU: ; %bb.0: ; %entry 1339; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 1340; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 1341; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1342; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1343; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1344; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 1345; GFX12-CU-NEXT: s_endpgm 1346 i32 %in, ptr addrspace(1) %out) { 1347entry: 1348 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") seq_cst, align 4 1349 ret void 1350} 1351 1352define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( 1353; GFX6-LABEL: global_wavefront_monotonic_atomicrmw: 1354; GFX6: ; %bb.0: ; %entry 1355; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1356; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1357; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1358; GFX6-NEXT: s_mov_b32 s11, s5 1359; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1360; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1361; GFX6-NEXT: s_mov_b32 s10, -1 1362; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1363; GFX6-NEXT: s_mov_b32 s5, s11 1364; GFX6-NEXT: s_mov_b32 s6, s10 1365; GFX6-NEXT: s_mov_b32 s7, s9 1366; GFX6-NEXT: v_mov_b32_e32 v0, s8 1367; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1368; GFX6-NEXT: s_endpgm 1369; 1370; GFX7-LABEL: global_wavefront_monotonic_atomicrmw: 1371; GFX7: ; %bb.0: ; %entry 1372; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1373; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1374; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1375; GFX7-NEXT: v_mov_b32_e32 v0, s6 1376; GFX7-NEXT: v_mov_b32_e32 v1, s7 1377; GFX7-NEXT: v_mov_b32_e32 v2, s4 1378; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1379; GFX7-NEXT: s_endpgm 1380; 1381; GFX10-WGP-LABEL: global_wavefront_monotonic_atomicrmw: 1382; GFX10-WGP: ; %bb.0: ; %entry 1383; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1384; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1385; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1386; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1387; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1388; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1389; GFX10-WGP-NEXT: s_endpgm 1390; 1391; GFX10-CU-LABEL: global_wavefront_monotonic_atomicrmw: 1392; GFX10-CU: ; %bb.0: ; %entry 1393; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1394; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1395; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1396; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1397; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1398; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1399; GFX10-CU-NEXT: s_endpgm 1400; 1401; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_atomicrmw: 1402; SKIP-CACHE-INV: ; %bb.0: ; %entry 1403; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1404; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1405; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1406; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1407; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1408; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1409; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1410; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1411; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1412; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1413; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1414; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1415; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1416; SKIP-CACHE-INV-NEXT: s_endpgm 1417; 1418; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: 1419; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1420; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1421; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1422; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1423; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1424; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1425; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1426; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1427; 1428; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: 1429; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1430; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1431; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1432; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1433; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1434; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1435; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1436; GFX90A-TGSPLIT-NEXT: s_endpgm 1437; 1438; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: 1439; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1440; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1441; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1442; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1443; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1444; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1445; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1446; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1447; 1448; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: 1449; GFX940-TGSPLIT: ; %bb.0: ; %entry 1450; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1451; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1452; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1453; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1454; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1455; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1456; GFX940-TGSPLIT-NEXT: s_endpgm 1457; 1458; GFX11-WGP-LABEL: global_wavefront_monotonic_atomicrmw: 1459; GFX11-WGP: ; %bb.0: ; %entry 1460; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1461; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1462; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1463; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1464; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1465; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1466; GFX11-WGP-NEXT: s_endpgm 1467; 1468; GFX11-CU-LABEL: global_wavefront_monotonic_atomicrmw: 1469; GFX11-CU: ; %bb.0: ; %entry 1470; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1471; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1472; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1473; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1474; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1475; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1476; GFX11-CU-NEXT: s_endpgm 1477; 1478; GFX12-WGP-LABEL: global_wavefront_monotonic_atomicrmw: 1479; GFX12-WGP: ; %bb.0: ; %entry 1480; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1481; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1482; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1483; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1484; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1485; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1486; GFX12-WGP-NEXT: s_endpgm 1487; 1488; GFX12-CU-LABEL: global_wavefront_monotonic_atomicrmw: 1489; GFX12-CU: ; %bb.0: ; %entry 1490; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1491; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1492; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1493; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1494; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1495; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1496; GFX12-CU-NEXT: s_endpgm 1497 ptr addrspace(1) %out, i32 %in) { 1498entry: 1499 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") monotonic 1500 ret void 1501} 1502 1503define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( 1504; GFX6-LABEL: global_wavefront_acquire_atomicrmw: 1505; GFX6: ; %bb.0: ; %entry 1506; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1507; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1508; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1509; GFX6-NEXT: s_mov_b32 s11, s5 1510; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1511; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1512; GFX6-NEXT: s_mov_b32 s10, -1 1513; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1514; GFX6-NEXT: s_mov_b32 s5, s11 1515; GFX6-NEXT: s_mov_b32 s6, s10 1516; GFX6-NEXT: s_mov_b32 s7, s9 1517; GFX6-NEXT: v_mov_b32_e32 v0, s8 1518; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1519; GFX6-NEXT: s_endpgm 1520; 1521; GFX7-LABEL: global_wavefront_acquire_atomicrmw: 1522; GFX7: ; %bb.0: ; %entry 1523; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1524; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1525; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1526; GFX7-NEXT: v_mov_b32_e32 v0, s6 1527; GFX7-NEXT: v_mov_b32_e32 v1, s7 1528; GFX7-NEXT: v_mov_b32_e32 v2, s4 1529; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1530; GFX7-NEXT: s_endpgm 1531; 1532; GFX10-WGP-LABEL: global_wavefront_acquire_atomicrmw: 1533; GFX10-WGP: ; %bb.0: ; %entry 1534; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1535; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1536; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1537; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1538; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1539; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1540; GFX10-WGP-NEXT: s_endpgm 1541; 1542; GFX10-CU-LABEL: global_wavefront_acquire_atomicrmw: 1543; GFX10-CU: ; %bb.0: ; %entry 1544; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1545; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1546; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1547; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1548; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1549; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1550; GFX10-CU-NEXT: s_endpgm 1551; 1552; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_atomicrmw: 1553; SKIP-CACHE-INV: ; %bb.0: ; %entry 1554; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1555; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1556; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1557; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1558; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1559; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1560; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1561; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1562; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1563; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1564; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1565; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1566; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1567; SKIP-CACHE-INV-NEXT: s_endpgm 1568; 1569; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: 1570; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1571; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1572; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1573; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1574; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1576; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1577; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1578; 1579; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: 1580; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1581; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1582; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1583; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1584; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1585; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1586; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1587; GFX90A-TGSPLIT-NEXT: s_endpgm 1588; 1589; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: 1590; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1591; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1592; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1593; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1594; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1595; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1596; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1597; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1598; 1599; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: 1600; GFX940-TGSPLIT: ; %bb.0: ; %entry 1601; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1602; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1603; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1604; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1605; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1606; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1607; GFX940-TGSPLIT-NEXT: s_endpgm 1608; 1609; GFX11-WGP-LABEL: global_wavefront_acquire_atomicrmw: 1610; GFX11-WGP: ; %bb.0: ; %entry 1611; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1612; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1613; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1614; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1615; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1616; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1617; GFX11-WGP-NEXT: s_endpgm 1618; 1619; GFX11-CU-LABEL: global_wavefront_acquire_atomicrmw: 1620; GFX11-CU: ; %bb.0: ; %entry 1621; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1622; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1623; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1624; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1625; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1626; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1627; GFX11-CU-NEXT: s_endpgm 1628; 1629; GFX12-WGP-LABEL: global_wavefront_acquire_atomicrmw: 1630; GFX12-WGP: ; %bb.0: ; %entry 1631; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1632; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1633; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1634; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1635; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1636; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1637; GFX12-WGP-NEXT: s_endpgm 1638; 1639; GFX12-CU-LABEL: global_wavefront_acquire_atomicrmw: 1640; GFX12-CU: ; %bb.0: ; %entry 1641; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1642; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1643; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1644; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1645; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1646; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1647; GFX12-CU-NEXT: s_endpgm 1648 ptr addrspace(1) %out, i32 %in) { 1649entry: 1650 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire 1651 ret void 1652} 1653 1654define amdgpu_kernel void @global_wavefront_release_atomicrmw( 1655; GFX6-LABEL: global_wavefront_release_atomicrmw: 1656; GFX6: ; %bb.0: ; %entry 1657; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1658; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1659; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1660; GFX6-NEXT: s_mov_b32 s11, s5 1661; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1662; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1663; GFX6-NEXT: s_mov_b32 s10, -1 1664; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1665; GFX6-NEXT: s_mov_b32 s5, s11 1666; GFX6-NEXT: s_mov_b32 s6, s10 1667; GFX6-NEXT: s_mov_b32 s7, s9 1668; GFX6-NEXT: v_mov_b32_e32 v0, s8 1669; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1670; GFX6-NEXT: s_endpgm 1671; 1672; GFX7-LABEL: global_wavefront_release_atomicrmw: 1673; GFX7: ; %bb.0: ; %entry 1674; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1675; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1676; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1677; GFX7-NEXT: v_mov_b32_e32 v0, s6 1678; GFX7-NEXT: v_mov_b32_e32 v1, s7 1679; GFX7-NEXT: v_mov_b32_e32 v2, s4 1680; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1681; GFX7-NEXT: s_endpgm 1682; 1683; GFX10-WGP-LABEL: global_wavefront_release_atomicrmw: 1684; GFX10-WGP: ; %bb.0: ; %entry 1685; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1686; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1687; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1688; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1689; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1690; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1691; GFX10-WGP-NEXT: s_endpgm 1692; 1693; GFX10-CU-LABEL: global_wavefront_release_atomicrmw: 1694; GFX10-CU: ; %bb.0: ; %entry 1695; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1696; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1697; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1698; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1699; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1700; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1701; GFX10-CU-NEXT: s_endpgm 1702; 1703; SKIP-CACHE-INV-LABEL: global_wavefront_release_atomicrmw: 1704; SKIP-CACHE-INV: ; %bb.0: ; %entry 1705; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1706; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1707; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1708; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1709; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1710; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1711; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1712; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1713; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1714; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1715; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1716; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1717; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1718; SKIP-CACHE-INV-NEXT: s_endpgm 1719; 1720; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_atomicrmw: 1721; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1722; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1723; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1724; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1725; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1726; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1727; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1728; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1729; 1730; GFX90A-TGSPLIT-LABEL: global_wavefront_release_atomicrmw: 1731; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1732; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1733; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1734; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1735; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1736; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1737; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1738; GFX90A-TGSPLIT-NEXT: s_endpgm 1739; 1740; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_atomicrmw: 1741; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1742; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1743; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1744; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1745; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1746; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1747; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1748; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1749; 1750; GFX940-TGSPLIT-LABEL: global_wavefront_release_atomicrmw: 1751; GFX940-TGSPLIT: ; %bb.0: ; %entry 1752; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1753; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1754; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1755; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1756; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1757; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1758; GFX940-TGSPLIT-NEXT: s_endpgm 1759; 1760; GFX11-WGP-LABEL: global_wavefront_release_atomicrmw: 1761; GFX11-WGP: ; %bb.0: ; %entry 1762; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1763; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1764; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1765; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1766; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1767; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1768; GFX11-WGP-NEXT: s_endpgm 1769; 1770; GFX11-CU-LABEL: global_wavefront_release_atomicrmw: 1771; GFX11-CU: ; %bb.0: ; %entry 1772; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1773; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1774; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1775; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1776; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1777; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1778; GFX11-CU-NEXT: s_endpgm 1779; 1780; GFX12-WGP-LABEL: global_wavefront_release_atomicrmw: 1781; GFX12-WGP: ; %bb.0: ; %entry 1782; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1783; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1784; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1785; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1786; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1787; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1788; GFX12-WGP-NEXT: s_endpgm 1789; 1790; GFX12-CU-LABEL: global_wavefront_release_atomicrmw: 1791; GFX12-CU: ; %bb.0: ; %entry 1792; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1793; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1794; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1795; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1796; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1797; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1798; GFX12-CU-NEXT: s_endpgm 1799 ptr addrspace(1) %out, i32 %in) { 1800entry: 1801 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") release 1802 ret void 1803} 1804 1805define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( 1806; GFX6-LABEL: global_wavefront_acq_rel_atomicrmw: 1807; GFX6: ; %bb.0: ; %entry 1808; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1809; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1810; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1811; GFX6-NEXT: s_mov_b32 s11, s5 1812; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1813; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1814; GFX6-NEXT: s_mov_b32 s10, -1 1815; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1816; GFX6-NEXT: s_mov_b32 s5, s11 1817; GFX6-NEXT: s_mov_b32 s6, s10 1818; GFX6-NEXT: s_mov_b32 s7, s9 1819; GFX6-NEXT: v_mov_b32_e32 v0, s8 1820; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1821; GFX6-NEXT: s_endpgm 1822; 1823; GFX7-LABEL: global_wavefront_acq_rel_atomicrmw: 1824; GFX7: ; %bb.0: ; %entry 1825; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1826; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1827; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1828; GFX7-NEXT: v_mov_b32_e32 v0, s6 1829; GFX7-NEXT: v_mov_b32_e32 v1, s7 1830; GFX7-NEXT: v_mov_b32_e32 v2, s4 1831; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1832; GFX7-NEXT: s_endpgm 1833; 1834; GFX10-WGP-LABEL: global_wavefront_acq_rel_atomicrmw: 1835; GFX10-WGP: ; %bb.0: ; %entry 1836; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1837; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1838; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1839; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1840; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1841; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1842; GFX10-WGP-NEXT: s_endpgm 1843; 1844; GFX10-CU-LABEL: global_wavefront_acq_rel_atomicrmw: 1845; GFX10-CU: ; %bb.0: ; %entry 1846; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1847; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1848; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 1849; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1850; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 1851; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 1852; GFX10-CU-NEXT: s_endpgm 1853; 1854; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_atomicrmw: 1855; SKIP-CACHE-INV: ; %bb.0: ; %entry 1856; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1857; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 1858; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1859; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 1860; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 1861; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 1862; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1863; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 1864; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 1865; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 1866; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 1867; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 1868; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 1869; SKIP-CACHE-INV-NEXT: s_endpgm 1870; 1871; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: 1872; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1873; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1874; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1875; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1876; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1877; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1878; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1879; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1880; 1881; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: 1882; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1883; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1884; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1885; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 1886; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1887; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 1888; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 1889; GFX90A-TGSPLIT-NEXT: s_endpgm 1890; 1891; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: 1892; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1893; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1894; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1895; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1896; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1897; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1898; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1899; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1900; 1901; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: 1902; GFX940-TGSPLIT: ; %bb.0: ; %entry 1903; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 1904; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1905; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 1906; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1907; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 1908; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 1909; GFX940-TGSPLIT-NEXT: s_endpgm 1910; 1911; GFX11-WGP-LABEL: global_wavefront_acq_rel_atomicrmw: 1912; GFX11-WGP: ; %bb.0: ; %entry 1913; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 1914; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1915; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1916; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1917; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 1918; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1919; GFX11-WGP-NEXT: s_endpgm 1920; 1921; GFX11-CU-LABEL: global_wavefront_acq_rel_atomicrmw: 1922; GFX11-CU: ; %bb.0: ; %entry 1923; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 1924; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1925; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1926; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1927; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 1928; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1929; GFX11-CU-NEXT: s_endpgm 1930; 1931; GFX12-WGP-LABEL: global_wavefront_acq_rel_atomicrmw: 1932; GFX12-WGP: ; %bb.0: ; %entry 1933; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 1934; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1935; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 1936; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1937; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 1938; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1939; GFX12-WGP-NEXT: s_endpgm 1940; 1941; GFX12-CU-LABEL: global_wavefront_acq_rel_atomicrmw: 1942; GFX12-CU: ; %bb.0: ; %entry 1943; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 1944; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1945; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 1946; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1947; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 1948; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 1949; GFX12-CU-NEXT: s_endpgm 1950 ptr addrspace(1) %out, i32 %in) { 1951entry: 1952 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel 1953 ret void 1954} 1955 1956define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( 1957; GFX6-LABEL: global_wavefront_seq_cst_atomicrmw: 1958; GFX6: ; %bb.0: ; %entry 1959; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1960; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 1961; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1962; GFX6-NEXT: s_mov_b32 s11, s5 1963; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 1964; GFX6-NEXT: s_mov_b32 s9, 0x100f000 1965; GFX6-NEXT: s_mov_b32 s10, -1 1966; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 1967; GFX6-NEXT: s_mov_b32 s5, s11 1968; GFX6-NEXT: s_mov_b32 s6, s10 1969; GFX6-NEXT: s_mov_b32 s7, s9 1970; GFX6-NEXT: v_mov_b32_e32 v0, s8 1971; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 1972; GFX6-NEXT: s_endpgm 1973; 1974; GFX7-LABEL: global_wavefront_seq_cst_atomicrmw: 1975; GFX7: ; %bb.0: ; %entry 1976; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1977; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1978; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1979; GFX7-NEXT: v_mov_b32_e32 v0, s6 1980; GFX7-NEXT: v_mov_b32_e32 v1, s7 1981; GFX7-NEXT: v_mov_b32_e32 v2, s4 1982; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1983; GFX7-NEXT: s_endpgm 1984; 1985; GFX10-WGP-LABEL: global_wavefront_seq_cst_atomicrmw: 1986; GFX10-WGP: ; %bb.0: ; %entry 1987; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1988; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1989; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 1990; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1991; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 1992; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 1993; GFX10-WGP-NEXT: s_endpgm 1994; 1995; GFX10-CU-LABEL: global_wavefront_seq_cst_atomicrmw: 1996; GFX10-CU: ; %bb.0: ; %entry 1997; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1998; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 1999; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2000; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2001; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2002; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 2003; GFX10-CU-NEXT: s_endpgm 2004; 2005; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_atomicrmw: 2006; SKIP-CACHE-INV: ; %bb.0: ; %entry 2007; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2008; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2009; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2010; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2011; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2012; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2013; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2014; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2015; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2016; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2017; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2018; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2019; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 2020; SKIP-CACHE-INV-NEXT: s_endpgm 2021; 2022; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: 2023; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2024; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2025; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2026; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2027; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2028; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2029; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2030; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2031; 2032; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: 2033; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2034; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2035; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2036; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2037; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2038; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2039; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 2040; GFX90A-TGSPLIT-NEXT: s_endpgm 2041; 2042; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: 2043; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2044; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2045; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2046; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2047; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2048; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2049; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2050; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2051; 2052; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: 2053; GFX940-TGSPLIT: ; %bb.0: ; %entry 2054; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2055; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2056; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2057; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2058; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2059; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 2060; GFX940-TGSPLIT-NEXT: s_endpgm 2061; 2062; GFX11-WGP-LABEL: global_wavefront_seq_cst_atomicrmw: 2063; GFX11-WGP: ; %bb.0: ; %entry 2064; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2065; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2066; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2067; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2068; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2069; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2070; GFX11-WGP-NEXT: s_endpgm 2071; 2072; GFX11-CU-LABEL: global_wavefront_seq_cst_atomicrmw: 2073; GFX11-CU: ; %bb.0: ; %entry 2074; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2075; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2076; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2077; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2078; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2079; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2080; GFX11-CU-NEXT: s_endpgm 2081; 2082; GFX12-WGP-LABEL: global_wavefront_seq_cst_atomicrmw: 2083; GFX12-WGP: ; %bb.0: ; %entry 2084; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2085; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2086; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2087; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2088; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2089; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2090; GFX12-WGP-NEXT: s_endpgm 2091; 2092; GFX12-CU-LABEL: global_wavefront_seq_cst_atomicrmw: 2093; GFX12-CU: ; %bb.0: ; %entry 2094; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2095; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2096; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2097; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2098; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2099; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 2100; GFX12-CU-NEXT: s_endpgm 2101 ptr addrspace(1) %out, i32 %in) { 2102entry: 2103 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst 2104 ret void 2105} 2106 2107define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( 2108; GFX6-LABEL: global_wavefront_acquire_ret_atomicrmw: 2109; GFX6: ; %bb.0: ; %entry 2110; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2111; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2112; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2113; GFX6-NEXT: s_mov_b32 s11, s5 2114; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2115; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2116; GFX6-NEXT: s_mov_b32 s10, -1 2117; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2118; GFX6-NEXT: s_mov_b32 s5, s11 2119; GFX6-NEXT: s_mov_b32 s6, s10 2120; GFX6-NEXT: s_mov_b32 s7, s9 2121; GFX6-NEXT: v_mov_b32_e32 v0, s8 2122; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2123; GFX6-NEXT: s_waitcnt vmcnt(0) 2124; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2125; GFX6-NEXT: s_endpgm 2126; 2127; GFX7-LABEL: global_wavefront_acquire_ret_atomicrmw: 2128; GFX7: ; %bb.0: ; %entry 2129; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2130; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2131; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2132; GFX7-NEXT: v_mov_b32_e32 v0, s4 2133; GFX7-NEXT: v_mov_b32_e32 v1, s5 2134; GFX7-NEXT: v_mov_b32_e32 v2, s6 2135; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2136; GFX7-NEXT: v_mov_b32_e32 v0, s4 2137; GFX7-NEXT: v_mov_b32_e32 v1, s5 2138; GFX7-NEXT: s_waitcnt vmcnt(0) 2139; GFX7-NEXT: flat_store_dword v[0:1], v2 2140; GFX7-NEXT: s_endpgm 2141; 2142; GFX10-WGP-LABEL: global_wavefront_acquire_ret_atomicrmw: 2143; GFX10-WGP: ; %bb.0: ; %entry 2144; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2145; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2146; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2147; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2148; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2149; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2150; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2151; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2152; GFX10-WGP-NEXT: s_endpgm 2153; 2154; GFX10-CU-LABEL: global_wavefront_acquire_ret_atomicrmw: 2155; GFX10-CU: ; %bb.0: ; %entry 2156; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2157; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2158; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2159; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2160; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2161; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2162; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2163; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2164; GFX10-CU-NEXT: s_endpgm 2165; 2166; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_ret_atomicrmw: 2167; SKIP-CACHE-INV: ; %bb.0: ; %entry 2168; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2169; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2170; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2171; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2172; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2173; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2174; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2175; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2176; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2177; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2178; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2179; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2180; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2181; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2182; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2183; SKIP-CACHE-INV-NEXT: s_endpgm 2184; 2185; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: 2186; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2187; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2188; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2189; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2190; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2191; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2192; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2193; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2194; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2195; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2196; 2197; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: 2198; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2199; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2200; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2201; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2202; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2203; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2204; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2205; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2206; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2207; GFX90A-TGSPLIT-NEXT: s_endpgm 2208; 2209; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: 2210; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2211; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2212; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2213; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2214; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2215; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2216; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2217; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2218; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2219; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2220; 2221; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: 2222; GFX940-TGSPLIT: ; %bb.0: ; %entry 2223; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2224; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2225; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2226; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2227; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2228; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2229; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2230; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2231; GFX940-TGSPLIT-NEXT: s_endpgm 2232; 2233; GFX11-WGP-LABEL: global_wavefront_acquire_ret_atomicrmw: 2234; GFX11-WGP: ; %bb.0: ; %entry 2235; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2236; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2237; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2238; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2239; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2240; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2241; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2242; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2243; GFX11-WGP-NEXT: s_endpgm 2244; 2245; GFX11-CU-LABEL: global_wavefront_acquire_ret_atomicrmw: 2246; GFX11-CU: ; %bb.0: ; %entry 2247; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2248; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2249; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2250; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2251; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2252; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2253; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2254; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2255; GFX11-CU-NEXT: s_endpgm 2256; 2257; GFX12-WGP-LABEL: global_wavefront_acquire_ret_atomicrmw: 2258; GFX12-WGP: ; %bb.0: ; %entry 2259; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2260; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2261; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2262; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2263; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2264; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2265; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2266; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2267; GFX12-WGP-NEXT: s_endpgm 2268; 2269; GFX12-CU-LABEL: global_wavefront_acquire_ret_atomicrmw: 2270; GFX12-CU: ; %bb.0: ; %entry 2271; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2272; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2273; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2274; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2275; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2276; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2277; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2278; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2279; GFX12-CU-NEXT: s_endpgm 2280 ptr addrspace(1) %out, i32 %in) { 2281entry: 2282 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire 2283 store i32 %val, ptr addrspace(1) %out, align 4 2284 ret void 2285} 2286 2287define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( 2288; GFX6-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2289; GFX6: ; %bb.0: ; %entry 2290; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2291; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2292; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2293; GFX6-NEXT: s_mov_b32 s11, s5 2294; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2295; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2296; GFX6-NEXT: s_mov_b32 s10, -1 2297; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2298; GFX6-NEXT: s_mov_b32 s5, s11 2299; GFX6-NEXT: s_mov_b32 s6, s10 2300; GFX6-NEXT: s_mov_b32 s7, s9 2301; GFX6-NEXT: v_mov_b32_e32 v0, s8 2302; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2303; GFX6-NEXT: s_waitcnt vmcnt(0) 2304; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2305; GFX6-NEXT: s_endpgm 2306; 2307; GFX7-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2308; GFX7: ; %bb.0: ; %entry 2309; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2310; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2311; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2312; GFX7-NEXT: v_mov_b32_e32 v0, s4 2313; GFX7-NEXT: v_mov_b32_e32 v1, s5 2314; GFX7-NEXT: v_mov_b32_e32 v2, s6 2315; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2316; GFX7-NEXT: v_mov_b32_e32 v0, s4 2317; GFX7-NEXT: v_mov_b32_e32 v1, s5 2318; GFX7-NEXT: s_waitcnt vmcnt(0) 2319; GFX7-NEXT: flat_store_dword v[0:1], v2 2320; GFX7-NEXT: s_endpgm 2321; 2322; GFX10-WGP-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2323; GFX10-WGP: ; %bb.0: ; %entry 2324; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2325; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2326; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2327; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2328; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2329; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2330; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2331; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2332; GFX10-WGP-NEXT: s_endpgm 2333; 2334; GFX10-CU-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2335; GFX10-CU: ; %bb.0: ; %entry 2336; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2337; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2338; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2339; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2340; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2341; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2342; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2343; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2344; GFX10-CU-NEXT: s_endpgm 2345; 2346; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2347; SKIP-CACHE-INV: ; %bb.0: ; %entry 2348; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2349; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2350; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2351; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2352; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2353; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2354; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2355; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2356; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2357; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2358; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2359; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2360; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2361; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2362; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2363; SKIP-CACHE-INV-NEXT: s_endpgm 2364; 2365; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2366; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2367; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2368; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2369; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2370; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2371; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2372; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2373; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2374; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2375; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2376; 2377; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2378; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2379; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2380; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2381; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2382; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2383; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2384; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2385; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2386; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2387; GFX90A-TGSPLIT-NEXT: s_endpgm 2388; 2389; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2390; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2391; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2392; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2393; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2394; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2395; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2396; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2397; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2398; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2399; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2400; 2401; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2402; GFX940-TGSPLIT: ; %bb.0: ; %entry 2403; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2404; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2405; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2406; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2407; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2408; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2409; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2410; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2411; GFX940-TGSPLIT-NEXT: s_endpgm 2412; 2413; GFX11-WGP-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2414; GFX11-WGP: ; %bb.0: ; %entry 2415; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2416; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2417; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2418; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2419; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2420; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2421; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2422; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2423; GFX11-WGP-NEXT: s_endpgm 2424; 2425; GFX11-CU-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2426; GFX11-CU: ; %bb.0: ; %entry 2427; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2428; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2429; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2430; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2431; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2432; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2433; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2434; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2435; GFX11-CU-NEXT: s_endpgm 2436; 2437; GFX12-WGP-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2438; GFX12-WGP: ; %bb.0: ; %entry 2439; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2440; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2441; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2442; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2443; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2444; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2445; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2446; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2447; GFX12-WGP-NEXT: s_endpgm 2448; 2449; GFX12-CU-LABEL: global_wavefront_acq_rel_ret_atomicrmw: 2450; GFX12-CU: ; %bb.0: ; %entry 2451; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2452; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2453; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2454; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2455; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2456; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2457; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2458; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2459; GFX12-CU-NEXT: s_endpgm 2460 ptr addrspace(1) %out, i32 %in) { 2461entry: 2462 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel 2463 store i32 %val, ptr addrspace(1) %out, align 4 2464 ret void 2465} 2466 2467define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( 2468; GFX6-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2469; GFX6: ; %bb.0: ; %entry 2470; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2471; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 2472; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2473; GFX6-NEXT: s_mov_b32 s11, s5 2474; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2475; GFX6-NEXT: s_mov_b32 s9, 0x100f000 2476; GFX6-NEXT: s_mov_b32 s10, -1 2477; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2478; GFX6-NEXT: s_mov_b32 s5, s11 2479; GFX6-NEXT: s_mov_b32 s6, s10 2480; GFX6-NEXT: s_mov_b32 s7, s9 2481; GFX6-NEXT: v_mov_b32_e32 v0, s8 2482; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 2483; GFX6-NEXT: s_waitcnt vmcnt(0) 2484; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2485; GFX6-NEXT: s_endpgm 2486; 2487; GFX7-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2488; GFX7: ; %bb.0: ; %entry 2489; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2490; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2491; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2492; GFX7-NEXT: v_mov_b32_e32 v0, s4 2493; GFX7-NEXT: v_mov_b32_e32 v1, s5 2494; GFX7-NEXT: v_mov_b32_e32 v2, s6 2495; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2496; GFX7-NEXT: v_mov_b32_e32 v0, s4 2497; GFX7-NEXT: v_mov_b32_e32 v1, s5 2498; GFX7-NEXT: s_waitcnt vmcnt(0) 2499; GFX7-NEXT: flat_store_dword v[0:1], v2 2500; GFX7-NEXT: s_endpgm 2501; 2502; GFX10-WGP-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2503; GFX10-WGP: ; %bb.0: ; %entry 2504; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2505; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2506; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2507; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2508; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 2509; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2510; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2511; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 2512; GFX10-WGP-NEXT: s_endpgm 2513; 2514; GFX10-CU-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2515; GFX10-CU: ; %bb.0: ; %entry 2516; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2517; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2518; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2519; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2520; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 2521; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2522; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2523; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 2524; GFX10-CU-NEXT: s_endpgm 2525; 2526; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2527; SKIP-CACHE-INV: ; %bb.0: ; %entry 2528; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2529; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 2530; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2531; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 2532; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2533; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 2534; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2535; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2536; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 2537; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 2538; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 2539; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 2540; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 2541; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2542; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 2543; SKIP-CACHE-INV-NEXT: s_endpgm 2544; 2545; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2546; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2547; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2548; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2549; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2550; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2551; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2552; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2553; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2554; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2555; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2556; 2557; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2558; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2559; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2560; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2561; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2562; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2563; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2564; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 2565; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2566; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 2567; GFX90A-TGSPLIT-NEXT: s_endpgm 2568; 2569; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2570; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2571; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2572; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2573; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2574; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2575; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2576; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2577; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 2578; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2579; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2580; 2581; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2582; GFX940-TGSPLIT: ; %bb.0: ; %entry 2583; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2584; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2585; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2586; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2587; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2588; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 2589; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2590; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 2591; GFX940-TGSPLIT-NEXT: s_endpgm 2592; 2593; GFX11-WGP-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2594; GFX11-WGP: ; %bb.0: ; %entry 2595; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2596; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2597; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2598; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2599; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 2600; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2601; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 2602; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2603; GFX11-WGP-NEXT: s_endpgm 2604; 2605; GFX11-CU-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2606; GFX11-CU: ; %bb.0: ; %entry 2607; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2608; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2609; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2610; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2611; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 2612; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 2613; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 2614; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2615; GFX11-CU-NEXT: s_endpgm 2616; 2617; GFX12-WGP-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2618; GFX12-WGP: ; %bb.0: ; %entry 2619; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2620; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2621; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2622; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2623; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 2624; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2625; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 2626; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 2627; GFX12-WGP-NEXT: s_endpgm 2628; 2629; GFX12-CU-LABEL: global_wavefront_seq_cst_ret_atomicrmw: 2630; GFX12-CU: ; %bb.0: ; %entry 2631; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2632; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2633; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2634; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2635; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 2636; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 2637; GFX12-CU-NEXT: s_wait_loadcnt 0x0 2638; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 2639; GFX12-CU-NEXT: s_endpgm 2640 ptr addrspace(1) %out, i32 %in) { 2641entry: 2642 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst 2643 store i32 %val, ptr addrspace(1) %out, align 4 2644 ret void 2645} 2646 2647define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( 2648; GFX6-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2649; GFX6: ; %bb.0: ; %entry 2650; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 2651; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 2652; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 2653; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 2654; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2655; GFX6-NEXT: s_mov_b32 s12, s5 2656; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2657; GFX6-NEXT: s_mov_b32 s10, 0x100f000 2658; GFX6-NEXT: s_mov_b32 s11, -1 2659; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2660; GFX6-NEXT: s_mov_b32 s5, s12 2661; GFX6-NEXT: s_mov_b32 s6, s11 2662; GFX6-NEXT: s_mov_b32 s7, s10 2663; GFX6-NEXT: v_mov_b32_e32 v0, s9 2664; GFX6-NEXT: v_mov_b32_e32 v2, s8 2665; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2666; GFX6-NEXT: v_mov_b32_e32 v1, v2 2667; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2668; GFX6-NEXT: s_endpgm 2669; 2670; GFX7-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2671; GFX7: ; %bb.0: ; %entry 2672; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 2673; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 2674; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 2675; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 2676; GFX7-NEXT: s_mov_b64 s[10:11], 16 2677; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2678; GFX7-NEXT: s_mov_b32 s4, s8 2679; GFX7-NEXT: s_mov_b32 s5, s9 2680; GFX7-NEXT: s_mov_b32 s9, s10 2681; GFX7-NEXT: s_mov_b32 s8, s11 2682; GFX7-NEXT: s_add_u32 s4, s4, s9 2683; GFX7-NEXT: s_addc_u32 s8, s5, s8 2684; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2685; GFX7-NEXT: s_mov_b32 s5, s8 2686; GFX7-NEXT: v_mov_b32_e32 v2, s7 2687; GFX7-NEXT: v_mov_b32_e32 v0, s6 2688; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2689; GFX7-NEXT: v_mov_b32_e32 v3, v0 2690; GFX7-NEXT: v_mov_b32_e32 v0, s4 2691; GFX7-NEXT: v_mov_b32_e32 v1, s5 2692; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2693; GFX7-NEXT: s_endpgm 2694; 2695; GFX10-WGP-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2696; GFX10-WGP: ; %bb.0: ; %entry 2697; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2698; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2699; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 2700; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 2701; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2702; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 2703; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 2704; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2705; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 2706; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 2707; GFX10-WGP-NEXT: s_endpgm 2708; 2709; GFX10-CU-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2710; GFX10-CU: ; %bb.0: ; %entry 2711; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2712; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2713; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 2714; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 2715; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2716; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 2717; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 2718; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2719; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 2720; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 2721; GFX10-CU-NEXT: s_endpgm 2722; 2723; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2724; SKIP-CACHE-INV: ; %bb.0: ; %entry 2725; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 2726; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 2727; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 2728; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 2729; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2730; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 2731; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2732; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 2733; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 2734; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2735; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 2736; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 2737; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 2738; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 2739; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 2740; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2741; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 2742; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2743; SKIP-CACHE-INV-NEXT: s_endpgm 2744; 2745; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2746; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2747; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2748; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2749; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2750; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2751; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2752; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2753; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2754; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2755; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2756; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 2757; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2758; 2759; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2760; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2761; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2762; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2763; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2764; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2765; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2766; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2767; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2768; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2769; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2770; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 2771; GFX90A-TGSPLIT-NEXT: s_endpgm 2772; 2773; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2774; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2775; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2776; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2777; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 2778; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 2779; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2780; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 2781; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2782; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2783; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2784; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 2785; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2786; 2787; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2788; GFX940-TGSPLIT: ; %bb.0: ; %entry 2789; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2790; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2791; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 2792; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 2793; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2794; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 2795; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2796; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2797; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2798; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 2799; GFX940-TGSPLIT-NEXT: s_endpgm 2800; 2801; GFX11-WGP-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2802; GFX11-WGP: ; %bb.0: ; %entry 2803; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 2804; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2805; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 2806; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 2807; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2808; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 2809; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 2810; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2811; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 2812; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 2813; GFX11-WGP-NEXT: s_endpgm 2814; 2815; GFX11-CU-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2816; GFX11-CU: ; %bb.0: ; %entry 2817; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 2818; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2819; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 2820; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 2821; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2822; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 2823; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 2824; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2825; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 2826; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 2827; GFX11-CU-NEXT: s_endpgm 2828; 2829; GFX12-WGP-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2830; GFX12-WGP: ; %bb.0: ; %entry 2831; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 2832; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2833; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 2834; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 2835; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2836; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 2837; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 2838; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2839; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 2840; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 2841; GFX12-WGP-NEXT: s_endpgm 2842; 2843; GFX12-CU-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: 2844; GFX12-CU: ; %bb.0: ; %entry 2845; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 2846; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2847; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 2848; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 2849; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2850; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 2851; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 2852; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2853; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 2854; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 2855; GFX12-CU-NEXT: s_endpgm 2856 ptr addrspace(1) %out, i32 %in, i32 %old) { 2857entry: 2858 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 2859 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic 2860 ret void 2861} 2862 2863define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( 2864; GFX6-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2865; GFX6: ; %bb.0: ; %entry 2866; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 2867; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 2868; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 2869; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 2870; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2871; GFX6-NEXT: s_mov_b32 s12, s5 2872; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2873; GFX6-NEXT: s_mov_b32 s10, 0x100f000 2874; GFX6-NEXT: s_mov_b32 s11, -1 2875; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 2876; GFX6-NEXT: s_mov_b32 s5, s12 2877; GFX6-NEXT: s_mov_b32 s6, s11 2878; GFX6-NEXT: s_mov_b32 s7, s10 2879; GFX6-NEXT: v_mov_b32_e32 v0, s9 2880; GFX6-NEXT: v_mov_b32_e32 v2, s8 2881; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2882; GFX6-NEXT: v_mov_b32_e32 v1, v2 2883; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2884; GFX6-NEXT: s_endpgm 2885; 2886; GFX7-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2887; GFX7: ; %bb.0: ; %entry 2888; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 2889; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 2890; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 2891; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 2892; GFX7-NEXT: s_mov_b64 s[10:11], 16 2893; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2894; GFX7-NEXT: s_mov_b32 s4, s8 2895; GFX7-NEXT: s_mov_b32 s5, s9 2896; GFX7-NEXT: s_mov_b32 s9, s10 2897; GFX7-NEXT: s_mov_b32 s8, s11 2898; GFX7-NEXT: s_add_u32 s4, s4, s9 2899; GFX7-NEXT: s_addc_u32 s8, s5, s8 2900; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2901; GFX7-NEXT: s_mov_b32 s5, s8 2902; GFX7-NEXT: v_mov_b32_e32 v2, s7 2903; GFX7-NEXT: v_mov_b32_e32 v0, s6 2904; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2905; GFX7-NEXT: v_mov_b32_e32 v3, v0 2906; GFX7-NEXT: v_mov_b32_e32 v0, s4 2907; GFX7-NEXT: v_mov_b32_e32 v1, s5 2908; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2909; GFX7-NEXT: s_endpgm 2910; 2911; GFX10-WGP-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2912; GFX10-WGP: ; %bb.0: ; %entry 2913; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2914; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2915; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 2916; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 2917; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2918; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 2919; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 2920; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2921; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 2922; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 2923; GFX10-WGP-NEXT: s_endpgm 2924; 2925; GFX10-CU-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2926; GFX10-CU: ; %bb.0: ; %entry 2927; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2928; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2929; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 2930; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 2931; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2932; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 2933; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 2934; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2935; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 2936; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 2937; GFX10-CU-NEXT: s_endpgm 2938; 2939; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2940; SKIP-CACHE-INV: ; %bb.0: ; %entry 2941; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 2942; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 2943; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 2944; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 2945; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2946; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 2947; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 2948; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 2949; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 2950; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 2951; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 2952; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 2953; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 2954; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 2955; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 2956; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2957; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 2958; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 2959; SKIP-CACHE-INV-NEXT: s_endpgm 2960; 2961; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2962; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2963; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2964; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2965; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2966; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2967; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2968; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2969; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2970; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2971; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2972; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 2973; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2974; 2975; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2976; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2977; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2978; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2979; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2980; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2981; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2982; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2983; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 2984; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2985; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 2986; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 2987; GFX90A-TGSPLIT-NEXT: s_endpgm 2988; 2989; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 2990; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2991; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 2992; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2993; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 2994; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 2995; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2996; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 2997; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 2998; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2999; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3000; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3001; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3002; 3003; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 3004; GFX940-TGSPLIT: ; %bb.0: ; %entry 3005; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3006; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3007; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3008; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3009; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3010; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3011; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3012; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3013; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3014; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3015; GFX940-TGSPLIT-NEXT: s_endpgm 3016; 3017; GFX11-WGP-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 3018; GFX11-WGP: ; %bb.0: ; %entry 3019; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3020; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3021; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3022; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3023; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3024; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3025; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3026; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3027; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3028; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3029; GFX11-WGP-NEXT: s_endpgm 3030; 3031; GFX11-CU-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 3032; GFX11-CU: ; %bb.0: ; %entry 3033; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3034; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3035; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3036; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3037; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3038; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3039; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3040; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3041; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3042; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3043; GFX11-CU-NEXT: s_endpgm 3044; 3045; GFX12-WGP-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 3046; GFX12-WGP: ; %bb.0: ; %entry 3047; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3048; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3049; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3050; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3051; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3052; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3053; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3054; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3055; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3056; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3057; GFX12-WGP-NEXT: s_endpgm 3058; 3059; GFX12-CU-LABEL: global_wavefront_acquire_monotonic_cmpxchg: 3060; GFX12-CU: ; %bb.0: ; %entry 3061; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3062; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3063; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3064; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3065; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3066; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3067; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3068; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3069; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3070; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3071; GFX12-CU-NEXT: s_endpgm 3072 ptr addrspace(1) %out, i32 %in, i32 %old) { 3073entry: 3074 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3075 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic 3076 ret void 3077} 3078 3079define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( 3080; GFX6-LABEL: global_wavefront_release_monotonic_cmpxchg: 3081; GFX6: ; %bb.0: ; %entry 3082; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3083; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3084; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3085; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3086; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3087; GFX6-NEXT: s_mov_b32 s12, s5 3088; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3089; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3090; GFX6-NEXT: s_mov_b32 s11, -1 3091; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3092; GFX6-NEXT: s_mov_b32 s5, s12 3093; GFX6-NEXT: s_mov_b32 s6, s11 3094; GFX6-NEXT: s_mov_b32 s7, s10 3095; GFX6-NEXT: v_mov_b32_e32 v0, s9 3096; GFX6-NEXT: v_mov_b32_e32 v2, s8 3097; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3098; GFX6-NEXT: v_mov_b32_e32 v1, v2 3099; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3100; GFX6-NEXT: s_endpgm 3101; 3102; GFX7-LABEL: global_wavefront_release_monotonic_cmpxchg: 3103; GFX7: ; %bb.0: ; %entry 3104; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3105; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3106; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3107; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3108; GFX7-NEXT: s_mov_b64 s[10:11], 16 3109; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3110; GFX7-NEXT: s_mov_b32 s4, s8 3111; GFX7-NEXT: s_mov_b32 s5, s9 3112; GFX7-NEXT: s_mov_b32 s9, s10 3113; GFX7-NEXT: s_mov_b32 s8, s11 3114; GFX7-NEXT: s_add_u32 s4, s4, s9 3115; GFX7-NEXT: s_addc_u32 s8, s5, s8 3116; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3117; GFX7-NEXT: s_mov_b32 s5, s8 3118; GFX7-NEXT: v_mov_b32_e32 v2, s7 3119; GFX7-NEXT: v_mov_b32_e32 v0, s6 3120; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3121; GFX7-NEXT: v_mov_b32_e32 v3, v0 3122; GFX7-NEXT: v_mov_b32_e32 v0, s4 3123; GFX7-NEXT: v_mov_b32_e32 v1, s5 3124; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3125; GFX7-NEXT: s_endpgm 3126; 3127; GFX10-WGP-LABEL: global_wavefront_release_monotonic_cmpxchg: 3128; GFX10-WGP: ; %bb.0: ; %entry 3129; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3130; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3131; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3132; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3133; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3134; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3135; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3136; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3137; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3138; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3139; GFX10-WGP-NEXT: s_endpgm 3140; 3141; GFX10-CU-LABEL: global_wavefront_release_monotonic_cmpxchg: 3142; GFX10-CU: ; %bb.0: ; %entry 3143; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3144; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3145; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3146; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3147; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3148; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3149; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3150; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3151; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3152; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3153; GFX10-CU-NEXT: s_endpgm 3154; 3155; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_cmpxchg: 3156; SKIP-CACHE-INV: ; %bb.0: ; %entry 3157; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3158; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3159; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3160; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3161; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3162; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3163; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3164; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3165; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3166; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3167; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3168; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3169; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3170; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3171; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3172; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3173; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3174; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3175; SKIP-CACHE-INV-NEXT: s_endpgm 3176; 3177; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: 3178; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3179; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3180; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3181; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3182; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3183; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3184; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3185; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3186; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3187; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3188; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3189; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3190; 3191; GFX90A-TGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: 3192; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3193; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3194; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3195; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3196; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3197; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3198; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3199; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3200; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3201; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3202; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3203; GFX90A-TGSPLIT-NEXT: s_endpgm 3204; 3205; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: 3206; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3207; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3208; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3209; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3210; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3211; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3212; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3213; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3214; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3215; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3216; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3217; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3218; 3219; GFX940-TGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: 3220; GFX940-TGSPLIT: ; %bb.0: ; %entry 3221; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3222; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3223; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3224; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3225; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3226; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3227; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3228; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3229; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3230; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3231; GFX940-TGSPLIT-NEXT: s_endpgm 3232; 3233; GFX11-WGP-LABEL: global_wavefront_release_monotonic_cmpxchg: 3234; GFX11-WGP: ; %bb.0: ; %entry 3235; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3236; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3237; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3238; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3239; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3240; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3241; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3242; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3243; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3244; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3245; GFX11-WGP-NEXT: s_endpgm 3246; 3247; GFX11-CU-LABEL: global_wavefront_release_monotonic_cmpxchg: 3248; GFX11-CU: ; %bb.0: ; %entry 3249; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3250; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3251; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3252; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3253; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3254; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3255; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3256; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3257; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3258; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3259; GFX11-CU-NEXT: s_endpgm 3260; 3261; GFX12-WGP-LABEL: global_wavefront_release_monotonic_cmpxchg: 3262; GFX12-WGP: ; %bb.0: ; %entry 3263; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3264; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3265; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3266; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3267; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3268; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3269; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3270; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3271; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3272; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3273; GFX12-WGP-NEXT: s_endpgm 3274; 3275; GFX12-CU-LABEL: global_wavefront_release_monotonic_cmpxchg: 3276; GFX12-CU: ; %bb.0: ; %entry 3277; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3278; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3279; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3280; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3281; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3282; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3283; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3284; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3285; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3286; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3287; GFX12-CU-NEXT: s_endpgm 3288 ptr addrspace(1) %out, i32 %in, i32 %old) { 3289entry: 3290 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3291 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic 3292 ret void 3293} 3294 3295define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( 3296; GFX6-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3297; GFX6: ; %bb.0: ; %entry 3298; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3299; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3300; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3301; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3302; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3303; GFX6-NEXT: s_mov_b32 s12, s5 3304; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3305; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3306; GFX6-NEXT: s_mov_b32 s11, -1 3307; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3308; GFX6-NEXT: s_mov_b32 s5, s12 3309; GFX6-NEXT: s_mov_b32 s6, s11 3310; GFX6-NEXT: s_mov_b32 s7, s10 3311; GFX6-NEXT: v_mov_b32_e32 v0, s9 3312; GFX6-NEXT: v_mov_b32_e32 v2, s8 3313; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3314; GFX6-NEXT: v_mov_b32_e32 v1, v2 3315; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3316; GFX6-NEXT: s_endpgm 3317; 3318; GFX7-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3319; GFX7: ; %bb.0: ; %entry 3320; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3321; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3322; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3323; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3324; GFX7-NEXT: s_mov_b64 s[10:11], 16 3325; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3326; GFX7-NEXT: s_mov_b32 s4, s8 3327; GFX7-NEXT: s_mov_b32 s5, s9 3328; GFX7-NEXT: s_mov_b32 s9, s10 3329; GFX7-NEXT: s_mov_b32 s8, s11 3330; GFX7-NEXT: s_add_u32 s4, s4, s9 3331; GFX7-NEXT: s_addc_u32 s8, s5, s8 3332; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3333; GFX7-NEXT: s_mov_b32 s5, s8 3334; GFX7-NEXT: v_mov_b32_e32 v2, s7 3335; GFX7-NEXT: v_mov_b32_e32 v0, s6 3336; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3337; GFX7-NEXT: v_mov_b32_e32 v3, v0 3338; GFX7-NEXT: v_mov_b32_e32 v0, s4 3339; GFX7-NEXT: v_mov_b32_e32 v1, s5 3340; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3341; GFX7-NEXT: s_endpgm 3342; 3343; GFX10-WGP-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3344; GFX10-WGP: ; %bb.0: ; %entry 3345; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3346; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3347; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3348; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3349; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3350; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3351; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3352; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3353; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3354; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3355; GFX10-WGP-NEXT: s_endpgm 3356; 3357; GFX10-CU-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3358; GFX10-CU: ; %bb.0: ; %entry 3359; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3360; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3361; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3362; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3363; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3364; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3365; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3366; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3367; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3368; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3369; GFX10-CU-NEXT: s_endpgm 3370; 3371; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3372; SKIP-CACHE-INV: ; %bb.0: ; %entry 3373; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3374; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3375; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3376; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3377; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3378; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3379; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3380; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3381; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3382; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3383; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3384; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3385; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3386; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3387; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3388; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3389; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3390; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3391; SKIP-CACHE-INV-NEXT: s_endpgm 3392; 3393; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3394; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3395; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3396; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3397; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3398; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3399; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3400; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3401; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3402; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3403; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3404; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3405; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3406; 3407; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3408; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3409; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3410; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3411; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3412; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3413; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3414; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3415; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3416; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3417; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3418; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3419; GFX90A-TGSPLIT-NEXT: s_endpgm 3420; 3421; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3422; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3423; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3424; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3425; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3426; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3427; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3428; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3429; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3430; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3431; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3432; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3433; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3434; 3435; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3436; GFX940-TGSPLIT: ; %bb.0: ; %entry 3437; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3438; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3439; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3440; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3441; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3442; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3443; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3444; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3445; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3446; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3447; GFX940-TGSPLIT-NEXT: s_endpgm 3448; 3449; GFX11-WGP-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3450; GFX11-WGP: ; %bb.0: ; %entry 3451; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3452; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3453; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3454; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3455; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3456; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3457; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3458; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3459; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3460; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3461; GFX11-WGP-NEXT: s_endpgm 3462; 3463; GFX11-CU-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3464; GFX11-CU: ; %bb.0: ; %entry 3465; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3466; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3467; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3468; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3469; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3470; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3471; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3472; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3473; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3474; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3475; GFX11-CU-NEXT: s_endpgm 3476; 3477; GFX12-WGP-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3478; GFX12-WGP: ; %bb.0: ; %entry 3479; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3480; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3481; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3482; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3483; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3484; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3485; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3486; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3487; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3488; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3489; GFX12-WGP-NEXT: s_endpgm 3490; 3491; GFX12-CU-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: 3492; GFX12-CU: ; %bb.0: ; %entry 3493; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3494; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3495; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3496; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3497; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3498; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3499; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3500; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3501; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3502; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3503; GFX12-CU-NEXT: s_endpgm 3504 ptr addrspace(1) %out, i32 %in, i32 %old) { 3505entry: 3506 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3507 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic 3508 ret void 3509} 3510 3511define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( 3512; GFX6-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3513; GFX6: ; %bb.0: ; %entry 3514; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3515; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3516; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3517; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3518; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3519; GFX6-NEXT: s_mov_b32 s12, s5 3520; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3521; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3522; GFX6-NEXT: s_mov_b32 s11, -1 3523; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3524; GFX6-NEXT: s_mov_b32 s5, s12 3525; GFX6-NEXT: s_mov_b32 s6, s11 3526; GFX6-NEXT: s_mov_b32 s7, s10 3527; GFX6-NEXT: v_mov_b32_e32 v0, s9 3528; GFX6-NEXT: v_mov_b32_e32 v2, s8 3529; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3530; GFX6-NEXT: v_mov_b32_e32 v1, v2 3531; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3532; GFX6-NEXT: s_endpgm 3533; 3534; GFX7-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3535; GFX7: ; %bb.0: ; %entry 3536; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3537; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3538; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3539; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3540; GFX7-NEXT: s_mov_b64 s[10:11], 16 3541; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3542; GFX7-NEXT: s_mov_b32 s4, s8 3543; GFX7-NEXT: s_mov_b32 s5, s9 3544; GFX7-NEXT: s_mov_b32 s9, s10 3545; GFX7-NEXT: s_mov_b32 s8, s11 3546; GFX7-NEXT: s_add_u32 s4, s4, s9 3547; GFX7-NEXT: s_addc_u32 s8, s5, s8 3548; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3549; GFX7-NEXT: s_mov_b32 s5, s8 3550; GFX7-NEXT: v_mov_b32_e32 v2, s7 3551; GFX7-NEXT: v_mov_b32_e32 v0, s6 3552; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3553; GFX7-NEXT: v_mov_b32_e32 v3, v0 3554; GFX7-NEXT: v_mov_b32_e32 v0, s4 3555; GFX7-NEXT: v_mov_b32_e32 v1, s5 3556; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3557; GFX7-NEXT: s_endpgm 3558; 3559; GFX10-WGP-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3560; GFX10-WGP: ; %bb.0: ; %entry 3561; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3562; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3563; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3564; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3565; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3566; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3567; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3568; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3569; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3570; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3571; GFX10-WGP-NEXT: s_endpgm 3572; 3573; GFX10-CU-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3574; GFX10-CU: ; %bb.0: ; %entry 3575; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3576; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3577; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3578; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3579; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3580; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3581; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3582; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3583; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3584; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3585; GFX10-CU-NEXT: s_endpgm 3586; 3587; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3588; SKIP-CACHE-INV: ; %bb.0: ; %entry 3589; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3590; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3591; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3592; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3593; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3594; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3595; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3596; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3597; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3598; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3599; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3600; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3601; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3602; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3603; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3604; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3605; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3606; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3607; SKIP-CACHE-INV-NEXT: s_endpgm 3608; 3609; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3610; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3611; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3612; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3613; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3614; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3615; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3616; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3617; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3618; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3619; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3620; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3621; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3622; 3623; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3624; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3625; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3626; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3627; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3628; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3629; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3630; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3631; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3632; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3633; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3634; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3635; GFX90A-TGSPLIT-NEXT: s_endpgm 3636; 3637; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3638; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3639; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3640; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3641; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3642; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3643; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3644; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3645; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3646; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3647; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3648; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3649; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3650; 3651; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3652; GFX940-TGSPLIT: ; %bb.0: ; %entry 3653; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3654; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3655; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3656; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3657; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3658; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3659; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3660; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3661; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3662; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3663; GFX940-TGSPLIT-NEXT: s_endpgm 3664; 3665; GFX11-WGP-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3666; GFX11-WGP: ; %bb.0: ; %entry 3667; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3668; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3669; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3670; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3671; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3672; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3673; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3674; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3675; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3676; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3677; GFX11-WGP-NEXT: s_endpgm 3678; 3679; GFX11-CU-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3680; GFX11-CU: ; %bb.0: ; %entry 3681; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3682; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3683; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3684; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3685; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3686; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3687; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3688; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3689; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3690; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3691; GFX11-CU-NEXT: s_endpgm 3692; 3693; GFX12-WGP-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3694; GFX12-WGP: ; %bb.0: ; %entry 3695; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3696; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3697; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3698; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3699; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3700; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3701; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3702; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3703; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3704; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3705; GFX12-WGP-NEXT: s_endpgm 3706; 3707; GFX12-CU-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: 3708; GFX12-CU: ; %bb.0: ; %entry 3709; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3710; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3711; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3712; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3713; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3714; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3715; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3716; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3717; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3718; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3719; GFX12-CU-NEXT: s_endpgm 3720 ptr addrspace(1) %out, i32 %in, i32 %old) { 3721entry: 3722 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3723 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic 3724 ret void 3725} 3726 3727define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg( 3728; GFX6-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3729; GFX6: ; %bb.0: ; %entry 3730; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3731; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3732; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3733; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3734; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3735; GFX6-NEXT: s_mov_b32 s12, s5 3736; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3737; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3738; GFX6-NEXT: s_mov_b32 s11, -1 3739; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3740; GFX6-NEXT: s_mov_b32 s5, s12 3741; GFX6-NEXT: s_mov_b32 s6, s11 3742; GFX6-NEXT: s_mov_b32 s7, s10 3743; GFX6-NEXT: v_mov_b32_e32 v0, s9 3744; GFX6-NEXT: v_mov_b32_e32 v2, s8 3745; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3746; GFX6-NEXT: v_mov_b32_e32 v1, v2 3747; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3748; GFX6-NEXT: s_endpgm 3749; 3750; GFX7-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3751; GFX7: ; %bb.0: ; %entry 3752; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3753; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3754; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3755; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3756; GFX7-NEXT: s_mov_b64 s[10:11], 16 3757; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3758; GFX7-NEXT: s_mov_b32 s4, s8 3759; GFX7-NEXT: s_mov_b32 s5, s9 3760; GFX7-NEXT: s_mov_b32 s9, s10 3761; GFX7-NEXT: s_mov_b32 s8, s11 3762; GFX7-NEXT: s_add_u32 s4, s4, s9 3763; GFX7-NEXT: s_addc_u32 s8, s5, s8 3764; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3765; GFX7-NEXT: s_mov_b32 s5, s8 3766; GFX7-NEXT: v_mov_b32_e32 v2, s7 3767; GFX7-NEXT: v_mov_b32_e32 v0, s6 3768; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3769; GFX7-NEXT: v_mov_b32_e32 v3, v0 3770; GFX7-NEXT: v_mov_b32_e32 v0, s4 3771; GFX7-NEXT: v_mov_b32_e32 v1, s5 3772; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3773; GFX7-NEXT: s_endpgm 3774; 3775; GFX10-WGP-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3776; GFX10-WGP: ; %bb.0: ; %entry 3777; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3778; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3779; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3780; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3781; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3782; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3783; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 3784; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3785; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 3786; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3787; GFX10-WGP-NEXT: s_endpgm 3788; 3789; GFX10-CU-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3790; GFX10-CU: ; %bb.0: ; %entry 3791; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3792; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3793; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 3794; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 3795; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3796; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 3797; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 3798; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3799; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 3800; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 3801; GFX10-CU-NEXT: s_endpgm 3802; 3803; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3804; SKIP-CACHE-INV: ; %bb.0: ; %entry 3805; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 3806; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 3807; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 3808; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 3809; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3810; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 3811; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 3812; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 3813; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 3814; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 3815; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 3816; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 3817; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 3818; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 3819; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 3820; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3821; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 3822; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 3823; SKIP-CACHE-INV-NEXT: s_endpgm 3824; 3825; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3826; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3827; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3828; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3829; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3830; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3831; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3833; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3834; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3835; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3836; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3837; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3838; 3839; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3840; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3841; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3842; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3843; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3844; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3845; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3846; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3847; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 3848; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3849; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3850; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 3851; GFX90A-TGSPLIT-NEXT: s_endpgm 3852; 3853; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3854; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3855; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3856; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3857; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3858; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3859; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3860; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3861; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3862; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3863; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3864; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3865; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3866; 3867; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3868; GFX940-TGSPLIT: ; %bb.0: ; %entry 3869; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 3870; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3871; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3872; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3873; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3874; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3875; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 3876; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3877; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 3878; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 3879; GFX940-TGSPLIT-NEXT: s_endpgm 3880; 3881; GFX11-WGP-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3882; GFX11-WGP: ; %bb.0: ; %entry 3883; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 3884; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3885; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3886; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3887; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3888; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 3889; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 3890; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3891; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 3892; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3893; GFX11-WGP-NEXT: s_endpgm 3894; 3895; GFX11-CU-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3896; GFX11-CU: ; %bb.0: ; %entry 3897; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 3898; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3899; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3900; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3901; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3902; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 3903; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 3904; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3905; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 3906; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3907; GFX11-CU-NEXT: s_endpgm 3908; 3909; GFX12-WGP-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3910; GFX12-WGP: ; %bb.0: ; %entry 3911; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 3912; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3913; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3914; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3915; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3916; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 3917; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 3918; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3919; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 3920; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3921; GFX12-WGP-NEXT: s_endpgm 3922; 3923; GFX12-CU-LABEL: global_wavefront_monotonic_acquire_cmpxchg: 3924; GFX12-CU: ; %bb.0: ; %entry 3925; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 3926; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3927; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3928; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3929; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3930; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 3931; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 3932; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 3933; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 3934; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 3935; GFX12-CU-NEXT: s_endpgm 3936 ptr addrspace(1) %out, i32 %in, i32 %old) { 3937entry: 3938 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 3939 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire 3940 ret void 3941} 3942 3943define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( 3944; GFX6-LABEL: global_wavefront_acquire_acquire_cmpxchg: 3945; GFX6: ; %bb.0: ; %entry 3946; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 3947; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 3948; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 3949; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 3950; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3951; GFX6-NEXT: s_mov_b32 s12, s5 3952; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 3953; GFX6-NEXT: s_mov_b32 s10, 0x100f000 3954; GFX6-NEXT: s_mov_b32 s11, -1 3955; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 3956; GFX6-NEXT: s_mov_b32 s5, s12 3957; GFX6-NEXT: s_mov_b32 s6, s11 3958; GFX6-NEXT: s_mov_b32 s7, s10 3959; GFX6-NEXT: v_mov_b32_e32 v0, s9 3960; GFX6-NEXT: v_mov_b32_e32 v2, s8 3961; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3962; GFX6-NEXT: v_mov_b32_e32 v1, v2 3963; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3964; GFX6-NEXT: s_endpgm 3965; 3966; GFX7-LABEL: global_wavefront_acquire_acquire_cmpxchg: 3967; GFX7: ; %bb.0: ; %entry 3968; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3969; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3970; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3971; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3972; GFX7-NEXT: s_mov_b64 s[10:11], 16 3973; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3974; GFX7-NEXT: s_mov_b32 s4, s8 3975; GFX7-NEXT: s_mov_b32 s5, s9 3976; GFX7-NEXT: s_mov_b32 s9, s10 3977; GFX7-NEXT: s_mov_b32 s8, s11 3978; GFX7-NEXT: s_add_u32 s4, s4, s9 3979; GFX7-NEXT: s_addc_u32 s8, s5, s8 3980; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3981; GFX7-NEXT: s_mov_b32 s5, s8 3982; GFX7-NEXT: v_mov_b32_e32 v2, s7 3983; GFX7-NEXT: v_mov_b32_e32 v0, s6 3984; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3985; GFX7-NEXT: v_mov_b32_e32 v3, v0 3986; GFX7-NEXT: v_mov_b32_e32 v0, s4 3987; GFX7-NEXT: v_mov_b32_e32 v1, s5 3988; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3989; GFX7-NEXT: s_endpgm 3990; 3991; GFX10-WGP-LABEL: global_wavefront_acquire_acquire_cmpxchg: 3992; GFX10-WGP: ; %bb.0: ; %entry 3993; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3994; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3995; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 3996; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 3997; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3998; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 3999; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4000; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4001; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4002; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4003; GFX10-WGP-NEXT: s_endpgm 4004; 4005; GFX10-CU-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4006; GFX10-CU: ; %bb.0: ; %entry 4007; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4008; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4009; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4010; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4011; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4012; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4013; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4014; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4015; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4016; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4017; GFX10-CU-NEXT: s_endpgm 4018; 4019; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4020; SKIP-CACHE-INV: ; %bb.0: ; %entry 4021; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4022; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4023; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4024; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4025; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4026; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4027; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4028; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4029; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4030; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4031; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4032; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4033; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4034; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4035; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4036; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4037; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4038; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4039; SKIP-CACHE-INV-NEXT: s_endpgm 4040; 4041; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4042; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4043; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4044; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4045; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4046; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4047; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4048; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4049; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4050; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4051; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4052; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4053; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4054; 4055; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4056; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4057; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4058; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4059; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4060; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4061; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4062; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4063; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4064; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4065; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4066; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4067; GFX90A-TGSPLIT-NEXT: s_endpgm 4068; 4069; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4070; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4071; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4072; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4073; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4074; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4075; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4076; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4077; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4078; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4079; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4080; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4081; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4082; 4083; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4084; GFX940-TGSPLIT: ; %bb.0: ; %entry 4085; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4086; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4087; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4088; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4089; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4090; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4091; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4092; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4093; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4094; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4095; GFX940-TGSPLIT-NEXT: s_endpgm 4096; 4097; GFX11-WGP-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4098; GFX11-WGP: ; %bb.0: ; %entry 4099; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4100; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4101; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4102; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4103; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4104; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4105; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4106; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4107; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4108; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4109; GFX11-WGP-NEXT: s_endpgm 4110; 4111; GFX11-CU-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4112; GFX11-CU: ; %bb.0: ; %entry 4113; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4114; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4115; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4116; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4117; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4118; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4119; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4120; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4121; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4122; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4123; GFX11-CU-NEXT: s_endpgm 4124; 4125; GFX12-WGP-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4126; GFX12-WGP: ; %bb.0: ; %entry 4127; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4128; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4129; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4130; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4131; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4132; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4133; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4134; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4135; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4136; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4137; GFX12-WGP-NEXT: s_endpgm 4138; 4139; GFX12-CU-LABEL: global_wavefront_acquire_acquire_cmpxchg: 4140; GFX12-CU: ; %bb.0: ; %entry 4141; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4142; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4143; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4144; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4145; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4146; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4147; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4148; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4149; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4150; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4151; GFX12-CU-NEXT: s_endpgm 4152 ptr addrspace(1) %out, i32 %in, i32 %old) { 4153entry: 4154 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4155 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire 4156 ret void 4157} 4158 4159define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( 4160; GFX6-LABEL: global_wavefront_release_acquire_cmpxchg: 4161; GFX6: ; %bb.0: ; %entry 4162; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4163; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4164; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4165; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4166; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4167; GFX6-NEXT: s_mov_b32 s12, s5 4168; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4169; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4170; GFX6-NEXT: s_mov_b32 s11, -1 4171; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4172; GFX6-NEXT: s_mov_b32 s5, s12 4173; GFX6-NEXT: s_mov_b32 s6, s11 4174; GFX6-NEXT: s_mov_b32 s7, s10 4175; GFX6-NEXT: v_mov_b32_e32 v0, s9 4176; GFX6-NEXT: v_mov_b32_e32 v2, s8 4177; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4178; GFX6-NEXT: v_mov_b32_e32 v1, v2 4179; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4180; GFX6-NEXT: s_endpgm 4181; 4182; GFX7-LABEL: global_wavefront_release_acquire_cmpxchg: 4183; GFX7: ; %bb.0: ; %entry 4184; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4185; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4186; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4187; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4188; GFX7-NEXT: s_mov_b64 s[10:11], 16 4189; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4190; GFX7-NEXT: s_mov_b32 s4, s8 4191; GFX7-NEXT: s_mov_b32 s5, s9 4192; GFX7-NEXT: s_mov_b32 s9, s10 4193; GFX7-NEXT: s_mov_b32 s8, s11 4194; GFX7-NEXT: s_add_u32 s4, s4, s9 4195; GFX7-NEXT: s_addc_u32 s8, s5, s8 4196; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4197; GFX7-NEXT: s_mov_b32 s5, s8 4198; GFX7-NEXT: v_mov_b32_e32 v2, s7 4199; GFX7-NEXT: v_mov_b32_e32 v0, s6 4200; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4201; GFX7-NEXT: v_mov_b32_e32 v3, v0 4202; GFX7-NEXT: v_mov_b32_e32 v0, s4 4203; GFX7-NEXT: v_mov_b32_e32 v1, s5 4204; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4205; GFX7-NEXT: s_endpgm 4206; 4207; GFX10-WGP-LABEL: global_wavefront_release_acquire_cmpxchg: 4208; GFX10-WGP: ; %bb.0: ; %entry 4209; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4210; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4211; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4212; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4213; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4214; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4215; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4216; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4217; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4218; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4219; GFX10-WGP-NEXT: s_endpgm 4220; 4221; GFX10-CU-LABEL: global_wavefront_release_acquire_cmpxchg: 4222; GFX10-CU: ; %bb.0: ; %entry 4223; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4224; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4225; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4226; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4227; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4228; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4229; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4230; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4231; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4232; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4233; GFX10-CU-NEXT: s_endpgm 4234; 4235; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_cmpxchg: 4236; SKIP-CACHE-INV: ; %bb.0: ; %entry 4237; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4238; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4239; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4240; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4241; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4242; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4243; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4244; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4245; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4246; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4247; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4248; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4249; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4250; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4251; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4252; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4253; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4254; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4255; SKIP-CACHE-INV-NEXT: s_endpgm 4256; 4257; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: 4258; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4259; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4260; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4261; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4262; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4263; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4264; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4265; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4266; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4267; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4268; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4269; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4270; 4271; GFX90A-TGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: 4272; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4273; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4274; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4275; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4276; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4277; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4278; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4279; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4280; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4281; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4282; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4283; GFX90A-TGSPLIT-NEXT: s_endpgm 4284; 4285; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: 4286; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4287; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4288; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4289; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4290; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4291; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4292; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4293; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4294; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4295; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4296; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4297; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4298; 4299; GFX940-TGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: 4300; GFX940-TGSPLIT: ; %bb.0: ; %entry 4301; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4302; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4303; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4304; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4305; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4306; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4307; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4308; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4309; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4310; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4311; GFX940-TGSPLIT-NEXT: s_endpgm 4312; 4313; GFX11-WGP-LABEL: global_wavefront_release_acquire_cmpxchg: 4314; GFX11-WGP: ; %bb.0: ; %entry 4315; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4316; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4317; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4318; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4319; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4320; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4321; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4322; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4323; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4324; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4325; GFX11-WGP-NEXT: s_endpgm 4326; 4327; GFX11-CU-LABEL: global_wavefront_release_acquire_cmpxchg: 4328; GFX11-CU: ; %bb.0: ; %entry 4329; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4330; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4331; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4332; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4333; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4334; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4335; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4336; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4337; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4338; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4339; GFX11-CU-NEXT: s_endpgm 4340; 4341; GFX12-WGP-LABEL: global_wavefront_release_acquire_cmpxchg: 4342; GFX12-WGP: ; %bb.0: ; %entry 4343; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4344; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4345; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4346; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4347; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4348; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4349; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4350; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4351; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4352; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4353; GFX12-WGP-NEXT: s_endpgm 4354; 4355; GFX12-CU-LABEL: global_wavefront_release_acquire_cmpxchg: 4356; GFX12-CU: ; %bb.0: ; %entry 4357; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4358; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4359; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4360; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4361; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4362; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4363; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4364; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4365; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4366; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4367; GFX12-CU-NEXT: s_endpgm 4368 ptr addrspace(1) %out, i32 %in, i32 %old) { 4369entry: 4370 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4371 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire 4372 ret void 4373} 4374 4375define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( 4376; GFX6-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4377; GFX6: ; %bb.0: ; %entry 4378; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4379; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4380; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4381; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4382; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4383; GFX6-NEXT: s_mov_b32 s12, s5 4384; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4385; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4386; GFX6-NEXT: s_mov_b32 s11, -1 4387; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4388; GFX6-NEXT: s_mov_b32 s5, s12 4389; GFX6-NEXT: s_mov_b32 s6, s11 4390; GFX6-NEXT: s_mov_b32 s7, s10 4391; GFX6-NEXT: v_mov_b32_e32 v0, s9 4392; GFX6-NEXT: v_mov_b32_e32 v2, s8 4393; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4394; GFX6-NEXT: v_mov_b32_e32 v1, v2 4395; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4396; GFX6-NEXT: s_endpgm 4397; 4398; GFX7-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4399; GFX7: ; %bb.0: ; %entry 4400; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4401; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4402; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4403; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4404; GFX7-NEXT: s_mov_b64 s[10:11], 16 4405; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4406; GFX7-NEXT: s_mov_b32 s4, s8 4407; GFX7-NEXT: s_mov_b32 s5, s9 4408; GFX7-NEXT: s_mov_b32 s9, s10 4409; GFX7-NEXT: s_mov_b32 s8, s11 4410; GFX7-NEXT: s_add_u32 s4, s4, s9 4411; GFX7-NEXT: s_addc_u32 s8, s5, s8 4412; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4413; GFX7-NEXT: s_mov_b32 s5, s8 4414; GFX7-NEXT: v_mov_b32_e32 v2, s7 4415; GFX7-NEXT: v_mov_b32_e32 v0, s6 4416; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4417; GFX7-NEXT: v_mov_b32_e32 v3, v0 4418; GFX7-NEXT: v_mov_b32_e32 v0, s4 4419; GFX7-NEXT: v_mov_b32_e32 v1, s5 4420; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4421; GFX7-NEXT: s_endpgm 4422; 4423; GFX10-WGP-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4424; GFX10-WGP: ; %bb.0: ; %entry 4425; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4426; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4427; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4428; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4429; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4430; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4431; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4432; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4433; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4434; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4435; GFX10-WGP-NEXT: s_endpgm 4436; 4437; GFX10-CU-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4438; GFX10-CU: ; %bb.0: ; %entry 4439; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4440; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4441; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4442; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4443; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4444; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4445; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4446; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4447; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4448; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4449; GFX10-CU-NEXT: s_endpgm 4450; 4451; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4452; SKIP-CACHE-INV: ; %bb.0: ; %entry 4453; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4454; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4455; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4456; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4457; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4458; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4459; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4460; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4461; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4462; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4463; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4464; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4465; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4466; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4467; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4468; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4469; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4470; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4471; SKIP-CACHE-INV-NEXT: s_endpgm 4472; 4473; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4474; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4475; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4476; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4477; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4478; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4479; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4480; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4481; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4482; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4483; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4484; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4485; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4486; 4487; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4488; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4489; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4490; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4491; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4492; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4493; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4494; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4495; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4496; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4497; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4498; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4499; GFX90A-TGSPLIT-NEXT: s_endpgm 4500; 4501; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4502; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4503; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4504; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4505; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4506; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4507; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4508; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4509; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4510; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4511; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4512; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4513; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4514; 4515; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4516; GFX940-TGSPLIT: ; %bb.0: ; %entry 4517; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4518; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4519; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4520; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4521; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4522; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4523; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4524; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4525; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4526; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4527; GFX940-TGSPLIT-NEXT: s_endpgm 4528; 4529; GFX11-WGP-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4530; GFX11-WGP: ; %bb.0: ; %entry 4531; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4532; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4533; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4534; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4535; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4536; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4537; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4538; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4539; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4540; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4541; GFX11-WGP-NEXT: s_endpgm 4542; 4543; GFX11-CU-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4544; GFX11-CU: ; %bb.0: ; %entry 4545; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4546; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4547; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4548; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4549; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4550; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4551; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4552; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4553; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4554; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4555; GFX11-CU-NEXT: s_endpgm 4556; 4557; GFX12-WGP-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4558; GFX12-WGP: ; %bb.0: ; %entry 4559; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4560; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4561; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4562; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4563; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4564; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4565; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4566; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4567; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4568; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4569; GFX12-WGP-NEXT: s_endpgm 4570; 4571; GFX12-CU-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: 4572; GFX12-CU: ; %bb.0: ; %entry 4573; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4574; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4575; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4576; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4577; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4578; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4579; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4580; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4581; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4582; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4583; GFX12-CU-NEXT: s_endpgm 4584 ptr addrspace(1) %out, i32 %in, i32 %old) { 4585entry: 4586 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4587 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire 4588 ret void 4589} 4590 4591define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( 4592; GFX6-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4593; GFX6: ; %bb.0: ; %entry 4594; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4595; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4596; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4597; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4598; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4599; GFX6-NEXT: s_mov_b32 s12, s5 4600; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4601; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4602; GFX6-NEXT: s_mov_b32 s11, -1 4603; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4604; GFX6-NEXT: s_mov_b32 s5, s12 4605; GFX6-NEXT: s_mov_b32 s6, s11 4606; GFX6-NEXT: s_mov_b32 s7, s10 4607; GFX6-NEXT: v_mov_b32_e32 v0, s9 4608; GFX6-NEXT: v_mov_b32_e32 v2, s8 4609; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4610; GFX6-NEXT: v_mov_b32_e32 v1, v2 4611; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4612; GFX6-NEXT: s_endpgm 4613; 4614; GFX7-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4615; GFX7: ; %bb.0: ; %entry 4616; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4617; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4618; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4619; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4620; GFX7-NEXT: s_mov_b64 s[10:11], 16 4621; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4622; GFX7-NEXT: s_mov_b32 s4, s8 4623; GFX7-NEXT: s_mov_b32 s5, s9 4624; GFX7-NEXT: s_mov_b32 s9, s10 4625; GFX7-NEXT: s_mov_b32 s8, s11 4626; GFX7-NEXT: s_add_u32 s4, s4, s9 4627; GFX7-NEXT: s_addc_u32 s8, s5, s8 4628; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4629; GFX7-NEXT: s_mov_b32 s5, s8 4630; GFX7-NEXT: v_mov_b32_e32 v2, s7 4631; GFX7-NEXT: v_mov_b32_e32 v0, s6 4632; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4633; GFX7-NEXT: v_mov_b32_e32 v3, v0 4634; GFX7-NEXT: v_mov_b32_e32 v0, s4 4635; GFX7-NEXT: v_mov_b32_e32 v1, s5 4636; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4637; GFX7-NEXT: s_endpgm 4638; 4639; GFX10-WGP-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4640; GFX10-WGP: ; %bb.0: ; %entry 4641; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4642; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4643; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4644; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4645; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4646; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4647; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4648; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4649; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4650; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4651; GFX10-WGP-NEXT: s_endpgm 4652; 4653; GFX10-CU-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4654; GFX10-CU: ; %bb.0: ; %entry 4655; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4656; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4657; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4658; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4659; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4660; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4661; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4662; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4663; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4664; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4665; GFX10-CU-NEXT: s_endpgm 4666; 4667; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4668; SKIP-CACHE-INV: ; %bb.0: ; %entry 4669; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4670; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4671; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4672; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4673; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4674; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4675; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4676; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4677; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4678; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4679; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4680; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4681; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4682; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4683; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4684; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4685; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4686; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4687; SKIP-CACHE-INV-NEXT: s_endpgm 4688; 4689; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4690; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4691; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4692; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4693; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4694; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4695; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4696; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4697; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4698; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4699; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4700; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4701; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4702; 4703; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4704; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4705; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4706; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4707; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4708; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4709; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4710; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4711; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4712; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4713; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4714; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4715; GFX90A-TGSPLIT-NEXT: s_endpgm 4716; 4717; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4718; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4719; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4720; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4721; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4722; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4723; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4724; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4725; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4726; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4727; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4728; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4729; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4730; 4731; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4732; GFX940-TGSPLIT: ; %bb.0: ; %entry 4733; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4734; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4735; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4736; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4737; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4738; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4739; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4740; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4741; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4742; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4743; GFX940-TGSPLIT-NEXT: s_endpgm 4744; 4745; GFX11-WGP-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4746; GFX11-WGP: ; %bb.0: ; %entry 4747; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4748; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4749; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4750; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4751; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4752; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4753; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4754; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4755; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4756; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4757; GFX11-WGP-NEXT: s_endpgm 4758; 4759; GFX11-CU-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4760; GFX11-CU: ; %bb.0: ; %entry 4761; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4762; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4763; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4764; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4765; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4766; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4767; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4768; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4769; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4770; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4771; GFX11-CU-NEXT: s_endpgm 4772; 4773; GFX12-WGP-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4774; GFX12-WGP: ; %bb.0: ; %entry 4775; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4776; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4777; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4778; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4779; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4780; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4781; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4782; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4783; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 4784; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4785; GFX12-WGP-NEXT: s_endpgm 4786; 4787; GFX12-CU-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: 4788; GFX12-CU: ; %bb.0: ; %entry 4789; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 4790; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4791; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4792; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4793; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4794; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 4795; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 4796; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4797; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 4798; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4799; GFX12-CU-NEXT: s_endpgm 4800 ptr addrspace(1) %out, i32 %in, i32 %old) { 4801entry: 4802 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 4803 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire 4804 ret void 4805} 4806 4807define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg( 4808; GFX6-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4809; GFX6: ; %bb.0: ; %entry 4810; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 4811; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 4812; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 4813; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 4814; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4815; GFX6-NEXT: s_mov_b32 s12, s5 4816; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 4817; GFX6-NEXT: s_mov_b32 s10, 0x100f000 4818; GFX6-NEXT: s_mov_b32 s11, -1 4819; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 4820; GFX6-NEXT: s_mov_b32 s5, s12 4821; GFX6-NEXT: s_mov_b32 s6, s11 4822; GFX6-NEXT: s_mov_b32 s7, s10 4823; GFX6-NEXT: v_mov_b32_e32 v0, s9 4824; GFX6-NEXT: v_mov_b32_e32 v2, s8 4825; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4826; GFX6-NEXT: v_mov_b32_e32 v1, v2 4827; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4828; GFX6-NEXT: s_endpgm 4829; 4830; GFX7-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4831; GFX7: ; %bb.0: ; %entry 4832; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4833; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4834; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4835; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4836; GFX7-NEXT: s_mov_b64 s[10:11], 16 4837; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4838; GFX7-NEXT: s_mov_b32 s4, s8 4839; GFX7-NEXT: s_mov_b32 s5, s9 4840; GFX7-NEXT: s_mov_b32 s9, s10 4841; GFX7-NEXT: s_mov_b32 s8, s11 4842; GFX7-NEXT: s_add_u32 s4, s4, s9 4843; GFX7-NEXT: s_addc_u32 s8, s5, s8 4844; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4845; GFX7-NEXT: s_mov_b32 s5, s8 4846; GFX7-NEXT: v_mov_b32_e32 v2, s7 4847; GFX7-NEXT: v_mov_b32_e32 v0, s6 4848; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4849; GFX7-NEXT: v_mov_b32_e32 v3, v0 4850; GFX7-NEXT: v_mov_b32_e32 v0, s4 4851; GFX7-NEXT: v_mov_b32_e32 v1, s5 4852; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4853; GFX7-NEXT: s_endpgm 4854; 4855; GFX10-WGP-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4856; GFX10-WGP: ; %bb.0: ; %entry 4857; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 4858; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4859; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 4860; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 4861; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4862; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 4863; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 4864; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4865; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 4866; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4867; GFX10-WGP-NEXT: s_endpgm 4868; 4869; GFX10-CU-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4870; GFX10-CU: ; %bb.0: ; %entry 4871; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 4872; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4873; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 4874; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 4875; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4876; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 4877; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 4878; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4879; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 4880; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 4881; GFX10-CU-NEXT: s_endpgm 4882; 4883; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4884; SKIP-CACHE-INV: ; %bb.0: ; %entry 4885; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 4886; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 4887; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 4888; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 4889; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4890; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 4891; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 4892; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 4893; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 4894; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 4895; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 4896; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 4897; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 4898; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 4899; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 4900; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4901; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 4902; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 4903; SKIP-CACHE-INV-NEXT: s_endpgm 4904; 4905; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4906; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4907; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4908; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4909; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4910; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4911; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4912; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4913; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4914; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4915; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4916; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4917; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4918; 4919; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4920; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4921; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4922; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4923; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4924; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4925; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4926; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4927; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 4928; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4929; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4930; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 4931; GFX90A-TGSPLIT-NEXT: s_endpgm 4932; 4933; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4934; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4935; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4936; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4937; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4938; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4939; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4940; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4941; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4942; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4943; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4944; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4945; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4946; 4947; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4948; GFX940-TGSPLIT: ; %bb.0: ; %entry 4949; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 4950; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4951; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4952; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4953; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4954; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4955; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 4956; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4957; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 4958; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 4959; GFX940-TGSPLIT-NEXT: s_endpgm 4960; 4961; GFX11-WGP-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4962; GFX11-WGP: ; %bb.0: ; %entry 4963; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 4964; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4965; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4966; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4967; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4968; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 4969; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 4970; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4971; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 4972; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4973; GFX11-WGP-NEXT: s_endpgm 4974; 4975; GFX11-CU-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4976; GFX11-CU: ; %bb.0: ; %entry 4977; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 4978; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4979; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4980; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4981; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4982; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 4983; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 4984; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4985; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 4986; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 4987; GFX11-CU-NEXT: s_endpgm 4988; 4989; GFX12-WGP-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 4990; GFX12-WGP: ; %bb.0: ; %entry 4991; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 4992; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4993; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4994; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4995; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4996; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 4997; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 4998; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4999; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5000; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5001; GFX12-WGP-NEXT: s_endpgm 5002; 5003; GFX12-CU-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: 5004; GFX12-CU: ; %bb.0: ; %entry 5005; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5006; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5007; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5008; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5009; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5010; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5011; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5012; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5013; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5014; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5015; GFX12-CU-NEXT: s_endpgm 5016 ptr addrspace(1) %out, i32 %in, i32 %old) { 5017entry: 5018 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5019 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst 5020 ret void 5021} 5022 5023define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg( 5024; GFX6-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5025; GFX6: ; %bb.0: ; %entry 5026; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5027; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5028; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5029; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5030; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5031; GFX6-NEXT: s_mov_b32 s12, s5 5032; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5033; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5034; GFX6-NEXT: s_mov_b32 s11, -1 5035; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5036; GFX6-NEXT: s_mov_b32 s5, s12 5037; GFX6-NEXT: s_mov_b32 s6, s11 5038; GFX6-NEXT: s_mov_b32 s7, s10 5039; GFX6-NEXT: v_mov_b32_e32 v0, s9 5040; GFX6-NEXT: v_mov_b32_e32 v2, s8 5041; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5042; GFX6-NEXT: v_mov_b32_e32 v1, v2 5043; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5044; GFX6-NEXT: s_endpgm 5045; 5046; GFX7-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5047; GFX7: ; %bb.0: ; %entry 5048; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5049; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5050; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5051; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5052; GFX7-NEXT: s_mov_b64 s[10:11], 16 5053; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5054; GFX7-NEXT: s_mov_b32 s4, s8 5055; GFX7-NEXT: s_mov_b32 s5, s9 5056; GFX7-NEXT: s_mov_b32 s9, s10 5057; GFX7-NEXT: s_mov_b32 s8, s11 5058; GFX7-NEXT: s_add_u32 s4, s4, s9 5059; GFX7-NEXT: s_addc_u32 s8, s5, s8 5060; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5061; GFX7-NEXT: s_mov_b32 s5, s8 5062; GFX7-NEXT: v_mov_b32_e32 v2, s7 5063; GFX7-NEXT: v_mov_b32_e32 v0, s6 5064; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5065; GFX7-NEXT: v_mov_b32_e32 v3, v0 5066; GFX7-NEXT: v_mov_b32_e32 v0, s4 5067; GFX7-NEXT: v_mov_b32_e32 v1, s5 5068; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5069; GFX7-NEXT: s_endpgm 5070; 5071; GFX10-WGP-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5072; GFX10-WGP: ; %bb.0: ; %entry 5073; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5074; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5075; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5076; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5077; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5078; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5079; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5080; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5081; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5082; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5083; GFX10-WGP-NEXT: s_endpgm 5084; 5085; GFX10-CU-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5086; GFX10-CU: ; %bb.0: ; %entry 5087; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5088; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5089; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5090; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5091; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5092; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5093; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5094; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5095; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5096; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5097; GFX10-CU-NEXT: s_endpgm 5098; 5099; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5100; SKIP-CACHE-INV: ; %bb.0: ; %entry 5101; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5102; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5103; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5104; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5105; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5106; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5107; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5108; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5109; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5110; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5111; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5112; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5113; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5114; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5115; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5116; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5117; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5118; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5119; SKIP-CACHE-INV-NEXT: s_endpgm 5120; 5121; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5122; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5123; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5124; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5125; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5126; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5127; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5128; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5129; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5130; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5131; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5132; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5133; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5134; 5135; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5136; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5137; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5138; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5139; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5140; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5141; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5142; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5143; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5144; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5145; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5146; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5147; GFX90A-TGSPLIT-NEXT: s_endpgm 5148; 5149; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5150; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5151; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5152; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5153; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5154; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5155; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5156; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5157; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5158; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5159; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5160; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5161; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5162; 5163; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5164; GFX940-TGSPLIT: ; %bb.0: ; %entry 5165; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5166; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5167; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5168; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5169; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5170; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5171; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5172; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5173; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5174; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5175; GFX940-TGSPLIT-NEXT: s_endpgm 5176; 5177; GFX11-WGP-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5178; GFX11-WGP: ; %bb.0: ; %entry 5179; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5180; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5181; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5182; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5183; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5184; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5185; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5186; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5187; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5188; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5189; GFX11-WGP-NEXT: s_endpgm 5190; 5191; GFX11-CU-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5192; GFX11-CU: ; %bb.0: ; %entry 5193; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5194; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5195; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5196; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5197; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5198; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5199; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5200; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5201; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5202; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5203; GFX11-CU-NEXT: s_endpgm 5204; 5205; GFX12-WGP-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5206; GFX12-WGP: ; %bb.0: ; %entry 5207; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5208; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5209; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5210; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5211; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5212; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5213; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5214; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5215; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5216; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5217; GFX12-WGP-NEXT: s_endpgm 5218; 5219; GFX12-CU-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: 5220; GFX12-CU: ; %bb.0: ; %entry 5221; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5222; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5223; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5224; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5225; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5226; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5227; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5228; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5229; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5230; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5231; GFX12-CU-NEXT: s_endpgm 5232 ptr addrspace(1) %out, i32 %in, i32 %old) { 5233entry: 5234 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5235 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst 5236 ret void 5237} 5238 5239define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg( 5240; GFX6-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5241; GFX6: ; %bb.0: ; %entry 5242; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5243; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5244; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5245; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5246; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5247; GFX6-NEXT: s_mov_b32 s12, s5 5248; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5249; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5250; GFX6-NEXT: s_mov_b32 s11, -1 5251; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5252; GFX6-NEXT: s_mov_b32 s5, s12 5253; GFX6-NEXT: s_mov_b32 s6, s11 5254; GFX6-NEXT: s_mov_b32 s7, s10 5255; GFX6-NEXT: v_mov_b32_e32 v0, s9 5256; GFX6-NEXT: v_mov_b32_e32 v2, s8 5257; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5258; GFX6-NEXT: v_mov_b32_e32 v1, v2 5259; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5260; GFX6-NEXT: s_endpgm 5261; 5262; GFX7-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5263; GFX7: ; %bb.0: ; %entry 5264; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5265; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5266; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5267; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5268; GFX7-NEXT: s_mov_b64 s[10:11], 16 5269; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5270; GFX7-NEXT: s_mov_b32 s4, s8 5271; GFX7-NEXT: s_mov_b32 s5, s9 5272; GFX7-NEXT: s_mov_b32 s9, s10 5273; GFX7-NEXT: s_mov_b32 s8, s11 5274; GFX7-NEXT: s_add_u32 s4, s4, s9 5275; GFX7-NEXT: s_addc_u32 s8, s5, s8 5276; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5277; GFX7-NEXT: s_mov_b32 s5, s8 5278; GFX7-NEXT: v_mov_b32_e32 v2, s7 5279; GFX7-NEXT: v_mov_b32_e32 v0, s6 5280; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5281; GFX7-NEXT: v_mov_b32_e32 v3, v0 5282; GFX7-NEXT: v_mov_b32_e32 v0, s4 5283; GFX7-NEXT: v_mov_b32_e32 v1, s5 5284; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5285; GFX7-NEXT: s_endpgm 5286; 5287; GFX10-WGP-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5288; GFX10-WGP: ; %bb.0: ; %entry 5289; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5290; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5291; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5292; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5293; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5294; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5295; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5296; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5297; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5298; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5299; GFX10-WGP-NEXT: s_endpgm 5300; 5301; GFX10-CU-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5302; GFX10-CU: ; %bb.0: ; %entry 5303; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5304; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5305; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5306; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5307; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5308; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5309; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5310; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5311; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5312; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5313; GFX10-CU-NEXT: s_endpgm 5314; 5315; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5316; SKIP-CACHE-INV: ; %bb.0: ; %entry 5317; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5318; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5319; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5320; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5321; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5322; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5323; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5324; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5325; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5326; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5327; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5328; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5329; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5330; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5331; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5332; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5333; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5334; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5335; SKIP-CACHE-INV-NEXT: s_endpgm 5336; 5337; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5338; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5339; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5340; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5341; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5342; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5343; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5344; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5345; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5346; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5347; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5348; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5349; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5350; 5351; GFX90A-TGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5352; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5353; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5354; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5355; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5356; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5357; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5358; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5359; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5360; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5361; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5362; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5363; GFX90A-TGSPLIT-NEXT: s_endpgm 5364; 5365; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5366; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5367; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5368; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5369; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5370; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5371; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5372; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5373; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5374; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5375; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5376; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5377; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5378; 5379; GFX940-TGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5380; GFX940-TGSPLIT: ; %bb.0: ; %entry 5381; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5382; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5383; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5384; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5385; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5386; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5387; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5388; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5389; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5390; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5391; GFX940-TGSPLIT-NEXT: s_endpgm 5392; 5393; GFX11-WGP-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5394; GFX11-WGP: ; %bb.0: ; %entry 5395; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5396; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5397; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5398; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5399; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5400; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5401; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5402; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5403; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5404; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5405; GFX11-WGP-NEXT: s_endpgm 5406; 5407; GFX11-CU-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5408; GFX11-CU: ; %bb.0: ; %entry 5409; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5410; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5411; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5412; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5413; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5414; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5415; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5416; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5417; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5418; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5419; GFX11-CU-NEXT: s_endpgm 5420; 5421; GFX12-WGP-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5422; GFX12-WGP: ; %bb.0: ; %entry 5423; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5424; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5425; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5426; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5427; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5428; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5429; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5430; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5431; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5432; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5433; GFX12-WGP-NEXT: s_endpgm 5434; 5435; GFX12-CU-LABEL: global_wavefront_release_seq_cst_cmpxchg: 5436; GFX12-CU: ; %bb.0: ; %entry 5437; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5438; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5439; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5440; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5441; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5442; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5443; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5444; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5445; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5446; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5447; GFX12-CU-NEXT: s_endpgm 5448 ptr addrspace(1) %out, i32 %in, i32 %old) { 5449entry: 5450 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5451 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst 5452 ret void 5453} 5454 5455define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg( 5456; GFX6-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5457; GFX6: ; %bb.0: ; %entry 5458; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5459; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5460; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5461; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5462; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5463; GFX6-NEXT: s_mov_b32 s12, s5 5464; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5465; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5466; GFX6-NEXT: s_mov_b32 s11, -1 5467; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5468; GFX6-NEXT: s_mov_b32 s5, s12 5469; GFX6-NEXT: s_mov_b32 s6, s11 5470; GFX6-NEXT: s_mov_b32 s7, s10 5471; GFX6-NEXT: v_mov_b32_e32 v0, s9 5472; GFX6-NEXT: v_mov_b32_e32 v2, s8 5473; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5474; GFX6-NEXT: v_mov_b32_e32 v1, v2 5475; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5476; GFX6-NEXT: s_endpgm 5477; 5478; GFX7-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5479; GFX7: ; %bb.0: ; %entry 5480; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5481; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5482; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5483; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5484; GFX7-NEXT: s_mov_b64 s[10:11], 16 5485; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5486; GFX7-NEXT: s_mov_b32 s4, s8 5487; GFX7-NEXT: s_mov_b32 s5, s9 5488; GFX7-NEXT: s_mov_b32 s9, s10 5489; GFX7-NEXT: s_mov_b32 s8, s11 5490; GFX7-NEXT: s_add_u32 s4, s4, s9 5491; GFX7-NEXT: s_addc_u32 s8, s5, s8 5492; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5493; GFX7-NEXT: s_mov_b32 s5, s8 5494; GFX7-NEXT: v_mov_b32_e32 v2, s7 5495; GFX7-NEXT: v_mov_b32_e32 v0, s6 5496; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5497; GFX7-NEXT: v_mov_b32_e32 v3, v0 5498; GFX7-NEXT: v_mov_b32_e32 v0, s4 5499; GFX7-NEXT: v_mov_b32_e32 v1, s5 5500; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5501; GFX7-NEXT: s_endpgm 5502; 5503; GFX10-WGP-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5504; GFX10-WGP: ; %bb.0: ; %entry 5505; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5506; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5507; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5508; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5509; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5510; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5511; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5512; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5513; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5514; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5515; GFX10-WGP-NEXT: s_endpgm 5516; 5517; GFX10-CU-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5518; GFX10-CU: ; %bb.0: ; %entry 5519; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5520; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5521; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5522; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5523; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5524; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5525; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5526; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5527; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5528; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5529; GFX10-CU-NEXT: s_endpgm 5530; 5531; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5532; SKIP-CACHE-INV: ; %bb.0: ; %entry 5533; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5534; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5535; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5536; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5537; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5538; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5539; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5540; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5541; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5542; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5543; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5544; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5545; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5546; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5547; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5548; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5549; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5550; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5551; SKIP-CACHE-INV-NEXT: s_endpgm 5552; 5553; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5554; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5555; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5556; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5557; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5558; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5559; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5560; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5561; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5562; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5563; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5564; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5565; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5566; 5567; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5568; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5569; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5570; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5571; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5572; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5573; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5574; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5575; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5576; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5577; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5578; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5579; GFX90A-TGSPLIT-NEXT: s_endpgm 5580; 5581; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5582; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5583; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5584; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5585; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5586; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5587; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5588; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5589; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5590; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5591; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5592; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5593; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5594; 5595; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5596; GFX940-TGSPLIT: ; %bb.0: ; %entry 5597; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5598; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5599; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5600; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5601; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5602; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5603; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5604; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5605; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5606; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5607; GFX940-TGSPLIT-NEXT: s_endpgm 5608; 5609; GFX11-WGP-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5610; GFX11-WGP: ; %bb.0: ; %entry 5611; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5612; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5613; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5614; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5615; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5616; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5617; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5618; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5619; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5620; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5621; GFX11-WGP-NEXT: s_endpgm 5622; 5623; GFX11-CU-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5624; GFX11-CU: ; %bb.0: ; %entry 5625; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5626; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5627; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5628; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5629; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5630; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5631; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5632; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5633; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5634; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5635; GFX11-CU-NEXT: s_endpgm 5636; 5637; GFX12-WGP-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5638; GFX12-WGP: ; %bb.0: ; %entry 5639; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5640; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5641; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5642; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5643; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5644; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5645; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5646; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5647; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5648; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5649; GFX12-WGP-NEXT: s_endpgm 5650; 5651; GFX12-CU-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: 5652; GFX12-CU: ; %bb.0: ; %entry 5653; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5654; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5655; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5656; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5657; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5658; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5659; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5660; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5661; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5662; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5663; GFX12-CU-NEXT: s_endpgm 5664 ptr addrspace(1) %out, i32 %in, i32 %old) { 5665entry: 5666 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5667 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst 5668 ret void 5669} 5670 5671define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( 5672; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5673; GFX6: ; %bb.0: ; %entry 5674; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5675; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5676; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5677; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5678; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5679; GFX6-NEXT: s_mov_b32 s12, s5 5680; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5681; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5682; GFX6-NEXT: s_mov_b32 s11, -1 5683; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5684; GFX6-NEXT: s_mov_b32 s5, s12 5685; GFX6-NEXT: s_mov_b32 s6, s11 5686; GFX6-NEXT: s_mov_b32 s7, s10 5687; GFX6-NEXT: v_mov_b32_e32 v0, s9 5688; GFX6-NEXT: v_mov_b32_e32 v2, s8 5689; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5690; GFX6-NEXT: v_mov_b32_e32 v1, v2 5691; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 5692; GFX6-NEXT: s_endpgm 5693; 5694; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5695; GFX7: ; %bb.0: ; %entry 5696; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5697; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5698; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5699; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5700; GFX7-NEXT: s_mov_b64 s[10:11], 16 5701; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5702; GFX7-NEXT: s_mov_b32 s4, s8 5703; GFX7-NEXT: s_mov_b32 s5, s9 5704; GFX7-NEXT: s_mov_b32 s9, s10 5705; GFX7-NEXT: s_mov_b32 s8, s11 5706; GFX7-NEXT: s_add_u32 s4, s4, s9 5707; GFX7-NEXT: s_addc_u32 s8, s5, s8 5708; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5709; GFX7-NEXT: s_mov_b32 s5, s8 5710; GFX7-NEXT: v_mov_b32_e32 v2, s7 5711; GFX7-NEXT: v_mov_b32_e32 v0, s6 5712; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5713; GFX7-NEXT: v_mov_b32_e32 v3, v0 5714; GFX7-NEXT: v_mov_b32_e32 v0, s4 5715; GFX7-NEXT: v_mov_b32_e32 v1, s5 5716; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5717; GFX7-NEXT: s_endpgm 5718; 5719; GFX10-WGP-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5720; GFX10-WGP: ; %bb.0: ; %entry 5721; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5722; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5723; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5724; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5725; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5726; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5727; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5728; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5729; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5730; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5731; GFX10-WGP-NEXT: s_endpgm 5732; 5733; GFX10-CU-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5734; GFX10-CU: ; %bb.0: ; %entry 5735; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5736; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5737; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5738; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5739; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5740; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5741; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5742; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5743; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5744; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 5745; GFX10-CU-NEXT: s_endpgm 5746; 5747; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5748; SKIP-CACHE-INV: ; %bb.0: ; %entry 5749; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5750; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5751; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5752; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5753; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5754; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5755; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5756; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5757; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5758; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5759; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5760; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5761; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5762; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5763; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5764; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5765; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5766; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 5767; SKIP-CACHE-INV-NEXT: s_endpgm 5768; 5769; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5770; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5771; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5772; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5773; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5774; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5775; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5776; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5777; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5778; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5779; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5780; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5781; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5782; 5783; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5784; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5785; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5786; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5787; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5788; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5789; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5790; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5791; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 5792; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5793; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5794; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 5795; GFX90A-TGSPLIT-NEXT: s_endpgm 5796; 5797; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5798; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5799; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5800; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5801; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5802; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5803; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5804; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5805; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5806; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5807; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5808; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5809; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5810; 5811; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5812; GFX940-TGSPLIT: ; %bb.0: ; %entry 5813; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 5814; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5815; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5816; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5817; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5819; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 5820; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5821; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 5822; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 5823; GFX940-TGSPLIT-NEXT: s_endpgm 5824; 5825; GFX11-WGP-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5826; GFX11-WGP: ; %bb.0: ; %entry 5827; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 5828; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5829; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5830; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5831; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5832; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 5833; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 5834; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5835; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 5836; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5837; GFX11-WGP-NEXT: s_endpgm 5838; 5839; GFX11-CU-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5840; GFX11-CU: ; %bb.0: ; %entry 5841; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 5842; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5843; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5844; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5845; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5846; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 5847; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 5848; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5849; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 5850; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5851; GFX11-CU-NEXT: s_endpgm 5852; 5853; GFX12-WGP-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5854; GFX12-WGP: ; %bb.0: ; %entry 5855; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 5856; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5857; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5858; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5859; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5860; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 5861; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 5862; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5863; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 5864; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5865; GFX12-WGP-NEXT: s_endpgm 5866; 5867; GFX12-CU-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: 5868; GFX12-CU: ; %bb.0: ; %entry 5869; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 5870; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5871; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5872; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5873; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5874; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 5875; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 5876; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5877; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 5878; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 5879; GFX12-CU-NEXT: s_endpgm 5880 ptr addrspace(1) %out, i32 %in, i32 %old) { 5881entry: 5882 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 5883 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst 5884 ret void 5885} 5886 5887define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg( 5888; GFX6-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 5889; GFX6: ; %bb.0: ; %entry 5890; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 5891; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5892; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 5893; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 5894; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5895; GFX6-NEXT: s_mov_b32 s12, s5 5896; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 5897; GFX6-NEXT: s_mov_b32 s10, 0x100f000 5898; GFX6-NEXT: s_mov_b32 s11, -1 5899; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 5900; GFX6-NEXT: s_mov_b32 s5, s12 5901; GFX6-NEXT: s_mov_b32 s6, s11 5902; GFX6-NEXT: s_mov_b32 s7, s10 5903; GFX6-NEXT: v_mov_b32_e32 v0, s9 5904; GFX6-NEXT: v_mov_b32_e32 v2, s8 5905; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5906; GFX6-NEXT: v_mov_b32_e32 v1, v2 5907; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5908; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 5909; GFX6-NEXT: s_waitcnt vmcnt(0) 5910; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5911; GFX6-NEXT: s_endpgm 5912; 5913; GFX7-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 5914; GFX7: ; %bb.0: ; %entry 5915; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 5916; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 5917; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 5918; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 5919; GFX7-NEXT: s_mov_b64 s[12:13], 16 5920; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5921; GFX7-NEXT: s_mov_b32 s6, s4 5922; GFX7-NEXT: s_mov_b32 s7, s5 5923; GFX7-NEXT: s_mov_b32 s11, s12 5924; GFX7-NEXT: s_mov_b32 s10, s13 5925; GFX7-NEXT: s_add_u32 s6, s6, s11 5926; GFX7-NEXT: s_addc_u32 s10, s7, s10 5927; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 5928; GFX7-NEXT: s_mov_b32 s7, s10 5929; GFX7-NEXT: v_mov_b32_e32 v2, s9 5930; GFX7-NEXT: v_mov_b32_e32 v0, s8 5931; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5932; GFX7-NEXT: v_mov_b32_e32 v3, v0 5933; GFX7-NEXT: v_mov_b32_e32 v0, s6 5934; GFX7-NEXT: v_mov_b32_e32 v1, s7 5935; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5936; GFX7-NEXT: v_mov_b32_e32 v0, s4 5937; GFX7-NEXT: v_mov_b32_e32 v1, s5 5938; GFX7-NEXT: s_waitcnt vmcnt(0) 5939; GFX7-NEXT: flat_store_dword v[0:1], v2 5940; GFX7-NEXT: s_endpgm 5941; 5942; GFX10-WGP-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 5943; GFX10-WGP: ; %bb.0: ; %entry 5944; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 5945; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5946; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 5947; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 5948; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5949; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 5950; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 5951; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5952; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 5953; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 5954; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5955; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 5956; GFX10-WGP-NEXT: s_endpgm 5957; 5958; GFX10-CU-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 5959; GFX10-CU: ; %bb.0: ; %entry 5960; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 5961; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5962; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 5963; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 5964; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5965; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 5966; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 5967; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 5968; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 5969; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 5970; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5971; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 5972; GFX10-CU-NEXT: s_endpgm 5973; 5974; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 5975; SKIP-CACHE-INV: ; %bb.0: ; %entry 5976; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 5977; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 5978; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 5979; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 5980; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5981; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 5982; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 5983; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 5984; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 5985; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 5986; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 5987; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 5988; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 5989; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 5990; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 5991; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 5992; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 5993; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 5994; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 5995; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5996; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 5997; SKIP-CACHE-INV-NEXT: s_endpgm 5998; 5999; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6000; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6001; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6002; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6003; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6004; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6005; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6006; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6007; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6008; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6009; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6010; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6011; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6012; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6013; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6014; 6015; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6016; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6017; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6018; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6019; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6020; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6021; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6022; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6023; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6024; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6025; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6026; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6027; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6028; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6029; GFX90A-TGSPLIT-NEXT: s_endpgm 6030; 6031; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6032; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6034; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6035; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6036; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6038; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6039; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6040; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6041; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6042; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6043; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6044; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6045; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6046; 6047; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6048; GFX940-TGSPLIT: ; %bb.0: ; %entry 6049; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6050; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6051; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6052; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6053; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6054; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6055; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6056; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6057; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6058; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6059; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6060; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6061; GFX940-TGSPLIT-NEXT: s_endpgm 6062; 6063; GFX11-WGP-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6064; GFX11-WGP: ; %bb.0: ; %entry 6065; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6066; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6067; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6068; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6069; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6070; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6071; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6072; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6073; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6074; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6075; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6076; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6077; GFX11-WGP-NEXT: s_endpgm 6078; 6079; GFX11-CU-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6080; GFX11-CU: ; %bb.0: ; %entry 6081; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6082; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6083; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6084; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6085; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6086; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6087; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6088; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6089; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6090; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6091; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6092; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6093; GFX11-CU-NEXT: s_endpgm 6094; 6095; GFX12-WGP-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6096; GFX12-WGP: ; %bb.0: ; %entry 6097; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6098; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6099; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6100; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6101; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6102; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6103; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6104; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6105; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6106; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6107; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6108; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6109; GFX12-WGP-NEXT: s_endpgm 6110; 6111; GFX12-CU-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: 6112; GFX12-CU: ; %bb.0: ; %entry 6113; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6114; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6115; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6116; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6117; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6118; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6119; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6120; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6121; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6122; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6123; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6124; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6125; GFX12-CU-NEXT: s_endpgm 6126 ptr addrspace(1) %out, i32 %in, i32 %old) { 6127entry: 6128 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6129 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic 6130 %val0 = extractvalue { i32, i1 } %val, 0 6131 store i32 %val0, ptr addrspace(1) %out, align 4 6132 ret void 6133} 6134 6135define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( 6136; GFX6-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6137; GFX6: ; %bb.0: ; %entry 6138; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6139; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6140; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6141; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6142; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6143; GFX6-NEXT: s_mov_b32 s12, s5 6144; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6145; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6146; GFX6-NEXT: s_mov_b32 s11, -1 6147; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6148; GFX6-NEXT: s_mov_b32 s5, s12 6149; GFX6-NEXT: s_mov_b32 s6, s11 6150; GFX6-NEXT: s_mov_b32 s7, s10 6151; GFX6-NEXT: v_mov_b32_e32 v0, s9 6152; GFX6-NEXT: v_mov_b32_e32 v2, s8 6153; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6154; GFX6-NEXT: v_mov_b32_e32 v1, v2 6155; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6156; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6157; GFX6-NEXT: s_waitcnt vmcnt(0) 6158; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6159; GFX6-NEXT: s_endpgm 6160; 6161; GFX7-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6162; GFX7: ; %bb.0: ; %entry 6163; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6164; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6165; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6166; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6167; GFX7-NEXT: s_mov_b64 s[12:13], 16 6168; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6169; GFX7-NEXT: s_mov_b32 s6, s4 6170; GFX7-NEXT: s_mov_b32 s7, s5 6171; GFX7-NEXT: s_mov_b32 s11, s12 6172; GFX7-NEXT: s_mov_b32 s10, s13 6173; GFX7-NEXT: s_add_u32 s6, s6, s11 6174; GFX7-NEXT: s_addc_u32 s10, s7, s10 6175; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6176; GFX7-NEXT: s_mov_b32 s7, s10 6177; GFX7-NEXT: v_mov_b32_e32 v2, s9 6178; GFX7-NEXT: v_mov_b32_e32 v0, s8 6179; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6180; GFX7-NEXT: v_mov_b32_e32 v3, v0 6181; GFX7-NEXT: v_mov_b32_e32 v0, s6 6182; GFX7-NEXT: v_mov_b32_e32 v1, s7 6183; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6184; GFX7-NEXT: v_mov_b32_e32 v0, s4 6185; GFX7-NEXT: v_mov_b32_e32 v1, s5 6186; GFX7-NEXT: s_waitcnt vmcnt(0) 6187; GFX7-NEXT: flat_store_dword v[0:1], v2 6188; GFX7-NEXT: s_endpgm 6189; 6190; GFX10-WGP-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6191; GFX10-WGP: ; %bb.0: ; %entry 6192; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6193; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6194; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6195; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6196; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6197; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6198; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6199; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6200; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6201; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6202; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6203; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6204; GFX10-WGP-NEXT: s_endpgm 6205; 6206; GFX10-CU-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6207; GFX10-CU: ; %bb.0: ; %entry 6208; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6209; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6210; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6211; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6212; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6213; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6214; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6215; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6216; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6217; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6218; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6219; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6220; GFX10-CU-NEXT: s_endpgm 6221; 6222; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6223; SKIP-CACHE-INV: ; %bb.0: ; %entry 6224; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6225; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6226; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6227; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6228; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6229; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6230; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6231; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6232; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6233; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6234; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6235; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6236; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6237; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6238; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6239; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6240; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6241; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6242; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6243; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6244; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6245; SKIP-CACHE-INV-NEXT: s_endpgm 6246; 6247; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6248; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6249; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6250; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6251; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6252; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6253; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6254; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6255; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6256; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6257; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6258; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6259; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6260; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6261; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6262; 6263; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6264; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6265; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6266; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6267; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6268; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6269; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6270; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6271; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6272; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6273; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6274; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6275; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6276; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6277; GFX90A-TGSPLIT-NEXT: s_endpgm 6278; 6279; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6280; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6281; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6282; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6283; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6284; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6285; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6286; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6287; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6288; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6289; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6290; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6291; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6292; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6293; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6294; 6295; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6296; GFX940-TGSPLIT: ; %bb.0: ; %entry 6297; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6298; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6299; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6300; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6301; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6302; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6303; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6304; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6305; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6306; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6307; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6308; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6309; GFX940-TGSPLIT-NEXT: s_endpgm 6310; 6311; GFX11-WGP-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6312; GFX11-WGP: ; %bb.0: ; %entry 6313; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6314; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6315; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6316; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6317; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6318; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6319; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6320; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6321; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6322; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6323; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6324; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6325; GFX11-WGP-NEXT: s_endpgm 6326; 6327; GFX11-CU-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6328; GFX11-CU: ; %bb.0: ; %entry 6329; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6330; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6331; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6332; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6333; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6334; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6335; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6336; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6337; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6338; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6339; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6340; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6341; GFX11-CU-NEXT: s_endpgm 6342; 6343; GFX12-WGP-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6344; GFX12-WGP: ; %bb.0: ; %entry 6345; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6346; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6347; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6348; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6349; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6350; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6351; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6352; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6353; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6354; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6355; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6356; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6357; GFX12-WGP-NEXT: s_endpgm 6358; 6359; GFX12-CU-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: 6360; GFX12-CU: ; %bb.0: ; %entry 6361; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6362; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6363; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6364; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6365; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6366; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6367; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6368; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6369; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6370; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6371; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6372; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6373; GFX12-CU-NEXT: s_endpgm 6374 ptr addrspace(1) %out, i32 %in, i32 %old) { 6375entry: 6376 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6377 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic 6378 %val0 = extractvalue { i32, i1 } %val, 0 6379 store i32 %val0, ptr addrspace(1) %out, align 4 6380 ret void 6381} 6382 6383define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg( 6384; GFX6-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6385; GFX6: ; %bb.0: ; %entry 6386; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6387; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6388; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6389; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6390; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6391; GFX6-NEXT: s_mov_b32 s12, s5 6392; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6393; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6394; GFX6-NEXT: s_mov_b32 s11, -1 6395; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6396; GFX6-NEXT: s_mov_b32 s5, s12 6397; GFX6-NEXT: s_mov_b32 s6, s11 6398; GFX6-NEXT: s_mov_b32 s7, s10 6399; GFX6-NEXT: v_mov_b32_e32 v0, s9 6400; GFX6-NEXT: v_mov_b32_e32 v2, s8 6401; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6402; GFX6-NEXT: v_mov_b32_e32 v1, v2 6403; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6404; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6405; GFX6-NEXT: s_waitcnt vmcnt(0) 6406; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6407; GFX6-NEXT: s_endpgm 6408; 6409; GFX7-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6410; GFX7: ; %bb.0: ; %entry 6411; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6412; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6413; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6414; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6415; GFX7-NEXT: s_mov_b64 s[12:13], 16 6416; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6417; GFX7-NEXT: s_mov_b32 s6, s4 6418; GFX7-NEXT: s_mov_b32 s7, s5 6419; GFX7-NEXT: s_mov_b32 s11, s12 6420; GFX7-NEXT: s_mov_b32 s10, s13 6421; GFX7-NEXT: s_add_u32 s6, s6, s11 6422; GFX7-NEXT: s_addc_u32 s10, s7, s10 6423; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6424; GFX7-NEXT: s_mov_b32 s7, s10 6425; GFX7-NEXT: v_mov_b32_e32 v2, s9 6426; GFX7-NEXT: v_mov_b32_e32 v0, s8 6427; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6428; GFX7-NEXT: v_mov_b32_e32 v3, v0 6429; GFX7-NEXT: v_mov_b32_e32 v0, s6 6430; GFX7-NEXT: v_mov_b32_e32 v1, s7 6431; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6432; GFX7-NEXT: v_mov_b32_e32 v0, s4 6433; GFX7-NEXT: v_mov_b32_e32 v1, s5 6434; GFX7-NEXT: s_waitcnt vmcnt(0) 6435; GFX7-NEXT: flat_store_dword v[0:1], v2 6436; GFX7-NEXT: s_endpgm 6437; 6438; GFX10-WGP-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6439; GFX10-WGP: ; %bb.0: ; %entry 6440; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6441; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6442; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6443; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6444; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6445; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6446; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6447; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6448; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6449; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6450; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6451; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6452; GFX10-WGP-NEXT: s_endpgm 6453; 6454; GFX10-CU-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6455; GFX10-CU: ; %bb.0: ; %entry 6456; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6457; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6458; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6459; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6460; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6461; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6462; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6463; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6464; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6465; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6466; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6467; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6468; GFX10-CU-NEXT: s_endpgm 6469; 6470; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6471; SKIP-CACHE-INV: ; %bb.0: ; %entry 6472; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6473; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6474; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6475; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6476; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6477; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6478; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6479; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6480; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6481; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6482; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6483; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6484; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6485; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6486; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6487; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6488; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6489; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6490; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6491; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6492; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6493; SKIP-CACHE-INV-NEXT: s_endpgm 6494; 6495; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6496; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6497; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6498; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6499; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6500; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6501; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6502; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6503; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6504; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6505; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6506; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6507; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6508; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6509; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6510; 6511; GFX90A-TGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6512; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6513; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6514; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6515; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6516; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6517; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6518; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6519; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6520; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6521; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6522; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6523; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6524; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6525; GFX90A-TGSPLIT-NEXT: s_endpgm 6526; 6527; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6528; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6529; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6530; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6531; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6532; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6533; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6534; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6535; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6536; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6537; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6538; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6539; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6540; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6541; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6542; 6543; GFX940-TGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6544; GFX940-TGSPLIT: ; %bb.0: ; %entry 6545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6546; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6547; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6548; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6549; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6550; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6551; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6552; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6553; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6554; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6555; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6556; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6557; GFX940-TGSPLIT-NEXT: s_endpgm 6558; 6559; GFX11-WGP-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6560; GFX11-WGP: ; %bb.0: ; %entry 6561; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6562; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6563; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6564; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6565; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6566; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6567; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6568; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6569; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6570; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6571; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6572; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6573; GFX11-WGP-NEXT: s_endpgm 6574; 6575; GFX11-CU-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6576; GFX11-CU: ; %bb.0: ; %entry 6577; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6578; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6579; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6580; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6581; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6582; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6583; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6584; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6585; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6586; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6587; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6588; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6589; GFX11-CU-NEXT: s_endpgm 6590; 6591; GFX12-WGP-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6592; GFX12-WGP: ; %bb.0: ; %entry 6593; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6594; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6595; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6596; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6597; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6598; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6599; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6600; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6601; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6602; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6603; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6604; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6605; GFX12-WGP-NEXT: s_endpgm 6606; 6607; GFX12-CU-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: 6608; GFX12-CU: ; %bb.0: ; %entry 6609; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6610; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6611; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6612; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6613; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6614; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6615; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6616; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6617; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6618; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6619; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6620; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6621; GFX12-CU-NEXT: s_endpgm 6622 ptr addrspace(1) %out, i32 %in, i32 %old) { 6623entry: 6624 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6625 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic 6626 %val0 = extractvalue { i32, i1 } %val, 0 6627 store i32 %val0, ptr addrspace(1) %out, align 4 6628 ret void 6629} 6630 6631define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( 6632; GFX6-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6633; GFX6: ; %bb.0: ; %entry 6634; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6635; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6636; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6637; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6638; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6639; GFX6-NEXT: s_mov_b32 s12, s5 6640; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6641; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6642; GFX6-NEXT: s_mov_b32 s11, -1 6643; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6644; GFX6-NEXT: s_mov_b32 s5, s12 6645; GFX6-NEXT: s_mov_b32 s6, s11 6646; GFX6-NEXT: s_mov_b32 s7, s10 6647; GFX6-NEXT: v_mov_b32_e32 v0, s9 6648; GFX6-NEXT: v_mov_b32_e32 v2, s8 6649; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6650; GFX6-NEXT: v_mov_b32_e32 v1, v2 6651; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6652; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6653; GFX6-NEXT: s_waitcnt vmcnt(0) 6654; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6655; GFX6-NEXT: s_endpgm 6656; 6657; GFX7-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6658; GFX7: ; %bb.0: ; %entry 6659; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6660; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6661; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6662; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6663; GFX7-NEXT: s_mov_b64 s[12:13], 16 6664; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6665; GFX7-NEXT: s_mov_b32 s6, s4 6666; GFX7-NEXT: s_mov_b32 s7, s5 6667; GFX7-NEXT: s_mov_b32 s11, s12 6668; GFX7-NEXT: s_mov_b32 s10, s13 6669; GFX7-NEXT: s_add_u32 s6, s6, s11 6670; GFX7-NEXT: s_addc_u32 s10, s7, s10 6671; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6672; GFX7-NEXT: s_mov_b32 s7, s10 6673; GFX7-NEXT: v_mov_b32_e32 v2, s9 6674; GFX7-NEXT: v_mov_b32_e32 v0, s8 6675; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6676; GFX7-NEXT: v_mov_b32_e32 v3, v0 6677; GFX7-NEXT: v_mov_b32_e32 v0, s6 6678; GFX7-NEXT: v_mov_b32_e32 v1, s7 6679; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6680; GFX7-NEXT: v_mov_b32_e32 v0, s4 6681; GFX7-NEXT: v_mov_b32_e32 v1, s5 6682; GFX7-NEXT: s_waitcnt vmcnt(0) 6683; GFX7-NEXT: flat_store_dword v[0:1], v2 6684; GFX7-NEXT: s_endpgm 6685; 6686; GFX10-WGP-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6687; GFX10-WGP: ; %bb.0: ; %entry 6688; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6689; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6690; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6691; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6692; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6693; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6694; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6695; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6696; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6697; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6698; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6699; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6700; GFX10-WGP-NEXT: s_endpgm 6701; 6702; GFX10-CU-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6703; GFX10-CU: ; %bb.0: ; %entry 6704; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6705; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6706; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6707; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6708; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6709; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6710; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6711; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6712; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6713; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6714; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6715; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6716; GFX10-CU-NEXT: s_endpgm 6717; 6718; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6719; SKIP-CACHE-INV: ; %bb.0: ; %entry 6720; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6721; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6722; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6723; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6724; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6725; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6726; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6727; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6728; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6729; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6730; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6731; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6732; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6733; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6734; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6735; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6736; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6737; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6738; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6739; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6740; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6741; SKIP-CACHE-INV-NEXT: s_endpgm 6742; 6743; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6744; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6745; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6746; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6747; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6748; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6749; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6750; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6751; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6752; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6753; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6754; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6755; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6756; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6757; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6758; 6759; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6760; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6761; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6762; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6763; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6764; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6765; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6766; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6767; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 6768; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6769; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6770; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 6771; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6772; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 6773; GFX90A-TGSPLIT-NEXT: s_endpgm 6774; 6775; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6776; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6777; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6778; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6779; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6780; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6781; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6782; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6783; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6784; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6785; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6786; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6787; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 6788; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6789; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6790; 6791; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6792; GFX940-TGSPLIT: ; %bb.0: ; %entry 6793; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6794; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6795; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6796; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6797; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6798; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6799; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 6800; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6801; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 6802; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 6803; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6804; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 6805; GFX940-TGSPLIT-NEXT: s_endpgm 6806; 6807; GFX11-WGP-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6808; GFX11-WGP: ; %bb.0: ; %entry 6809; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 6810; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6811; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6812; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6813; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6814; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 6815; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 6816; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6817; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 6818; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6819; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 6820; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6821; GFX11-WGP-NEXT: s_endpgm 6822; 6823; GFX11-CU-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6824; GFX11-CU: ; %bb.0: ; %entry 6825; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 6826; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6827; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6828; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6829; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6830; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 6831; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 6832; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6833; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 6834; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 6835; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 6836; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6837; GFX11-CU-NEXT: s_endpgm 6838; 6839; GFX12-WGP-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6840; GFX12-WGP: ; %bb.0: ; %entry 6841; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 6842; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6843; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6844; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6845; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6846; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 6847; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 6848; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6849; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 6850; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6851; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 6852; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 6853; GFX12-WGP-NEXT: s_endpgm 6854; 6855; GFX12-CU-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: 6856; GFX12-CU: ; %bb.0: ; %entry 6857; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 6858; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6859; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6860; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6861; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6862; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 6863; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 6864; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6865; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 6866; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 6867; GFX12-CU-NEXT: s_wait_loadcnt 0x0 6868; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 6869; GFX12-CU-NEXT: s_endpgm 6870 ptr addrspace(1) %out, i32 %in, i32 %old) { 6871entry: 6872 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 6873 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic 6874 %val0 = extractvalue { i32, i1 } %val, 0 6875 store i32 %val0, ptr addrspace(1) %out, align 4 6876 ret void 6877} 6878 6879define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( 6880; GFX6-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6881; GFX6: ; %bb.0: ; %entry 6882; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 6883; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6884; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 6885; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 6886; GFX6-NEXT: s_waitcnt lgkmcnt(0) 6887; GFX6-NEXT: s_mov_b32 s12, s5 6888; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 6889; GFX6-NEXT: s_mov_b32 s10, 0x100f000 6890; GFX6-NEXT: s_mov_b32 s11, -1 6891; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 6892; GFX6-NEXT: s_mov_b32 s5, s12 6893; GFX6-NEXT: s_mov_b32 s6, s11 6894; GFX6-NEXT: s_mov_b32 s7, s10 6895; GFX6-NEXT: v_mov_b32_e32 v0, s9 6896; GFX6-NEXT: v_mov_b32_e32 v2, s8 6897; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6898; GFX6-NEXT: v_mov_b32_e32 v1, v2 6899; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 6900; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6901; GFX6-NEXT: s_waitcnt vmcnt(0) 6902; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 6903; GFX6-NEXT: s_endpgm 6904; 6905; GFX7-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6906; GFX7: ; %bb.0: ; %entry 6907; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6908; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6909; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6910; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6911; GFX7-NEXT: s_mov_b64 s[12:13], 16 6912; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6913; GFX7-NEXT: s_mov_b32 s6, s4 6914; GFX7-NEXT: s_mov_b32 s7, s5 6915; GFX7-NEXT: s_mov_b32 s11, s12 6916; GFX7-NEXT: s_mov_b32 s10, s13 6917; GFX7-NEXT: s_add_u32 s6, s6, s11 6918; GFX7-NEXT: s_addc_u32 s10, s7, s10 6919; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6920; GFX7-NEXT: s_mov_b32 s7, s10 6921; GFX7-NEXT: v_mov_b32_e32 v2, s9 6922; GFX7-NEXT: v_mov_b32_e32 v0, s8 6923; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6924; GFX7-NEXT: v_mov_b32_e32 v3, v0 6925; GFX7-NEXT: v_mov_b32_e32 v0, s6 6926; GFX7-NEXT: v_mov_b32_e32 v1, s7 6927; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6928; GFX7-NEXT: v_mov_b32_e32 v0, s4 6929; GFX7-NEXT: v_mov_b32_e32 v1, s5 6930; GFX7-NEXT: s_waitcnt vmcnt(0) 6931; GFX7-NEXT: flat_store_dword v[0:1], v2 6932; GFX7-NEXT: s_endpgm 6933; 6934; GFX10-WGP-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6935; GFX10-WGP: ; %bb.0: ; %entry 6936; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 6937; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6938; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 6939; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 6940; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6941; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6942; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 6943; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6944; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 6945; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6946; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 6947; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 6948; GFX10-WGP-NEXT: s_endpgm 6949; 6950; GFX10-CU-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6951; GFX10-CU: ; %bb.0: ; %entry 6952; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 6953; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6954; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 6955; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 6956; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6957; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 6958; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 6959; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 6960; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 6961; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 6962; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 6963; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 6964; GFX10-CU-NEXT: s_endpgm 6965; 6966; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6967; SKIP-CACHE-INV: ; %bb.0: ; %entry 6968; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 6969; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 6970; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 6971; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 6972; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6973; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 6974; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 6975; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 6976; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 6977; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 6978; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 6979; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 6980; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 6981; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 6982; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 6983; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 6984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 6985; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 6986; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 6987; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 6988; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 6989; SKIP-CACHE-INV-NEXT: s_endpgm 6990; 6991; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 6992; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6993; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 6994; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6995; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6996; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6997; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6998; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6999; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7000; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7001; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7002; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7003; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7004; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7005; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7006; 7007; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7008; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7009; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7010; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7011; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7012; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7013; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7014; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7015; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7016; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7017; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7018; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7019; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7020; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7021; GFX90A-TGSPLIT-NEXT: s_endpgm 7022; 7023; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7024; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7025; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7026; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7027; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7028; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7029; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7030; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7031; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7032; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7033; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7034; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7035; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7036; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7037; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7038; 7039; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7040; GFX940-TGSPLIT: ; %bb.0: ; %entry 7041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7042; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7043; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7044; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7045; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7046; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7047; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7048; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7049; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7050; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7051; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7052; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7053; GFX940-TGSPLIT-NEXT: s_endpgm 7054; 7055; GFX11-WGP-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7056; GFX11-WGP: ; %bb.0: ; %entry 7057; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7058; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7059; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7060; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7061; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7062; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7063; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7064; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7065; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7066; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7067; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7068; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7069; GFX11-WGP-NEXT: s_endpgm 7070; 7071; GFX11-CU-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7072; GFX11-CU: ; %bb.0: ; %entry 7073; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7074; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7075; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7076; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7077; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7078; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7079; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7080; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7081; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7082; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7083; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7084; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7085; GFX11-CU-NEXT: s_endpgm 7086; 7087; GFX12-WGP-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7088; GFX12-WGP: ; %bb.0: ; %entry 7089; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7090; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7091; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7092; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7093; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7094; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7095; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7096; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7097; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7098; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7099; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7100; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7101; GFX12-WGP-NEXT: s_endpgm 7102; 7103; GFX12-CU-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: 7104; GFX12-CU: ; %bb.0: ; %entry 7105; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7106; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7107; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7108; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7109; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7110; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7111; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7112; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7113; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7114; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7115; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7116; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7117; GFX12-CU-NEXT: s_endpgm 7118 ptr addrspace(1) %out, i32 %in, i32 %old) { 7119entry: 7120 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7121 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic 7122 %val0 = extractvalue { i32, i1 } %val, 0 7123 store i32 %val0, ptr addrspace(1) %out, align 4 7124 ret void 7125} 7126 7127define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg( 7128; GFX6-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7129; GFX6: ; %bb.0: ; %entry 7130; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7131; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7132; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7133; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7134; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7135; GFX6-NEXT: s_mov_b32 s12, s5 7136; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7137; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7138; GFX6-NEXT: s_mov_b32 s11, -1 7139; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7140; GFX6-NEXT: s_mov_b32 s5, s12 7141; GFX6-NEXT: s_mov_b32 s6, s11 7142; GFX6-NEXT: s_mov_b32 s7, s10 7143; GFX6-NEXT: v_mov_b32_e32 v0, s9 7144; GFX6-NEXT: v_mov_b32_e32 v2, s8 7145; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7146; GFX6-NEXT: v_mov_b32_e32 v1, v2 7147; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7148; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7149; GFX6-NEXT: s_waitcnt vmcnt(0) 7150; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7151; GFX6-NEXT: s_endpgm 7152; 7153; GFX7-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7154; GFX7: ; %bb.0: ; %entry 7155; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7156; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7157; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7158; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7159; GFX7-NEXT: s_mov_b64 s[12:13], 16 7160; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7161; GFX7-NEXT: s_mov_b32 s6, s4 7162; GFX7-NEXT: s_mov_b32 s7, s5 7163; GFX7-NEXT: s_mov_b32 s11, s12 7164; GFX7-NEXT: s_mov_b32 s10, s13 7165; GFX7-NEXT: s_add_u32 s6, s6, s11 7166; GFX7-NEXT: s_addc_u32 s10, s7, s10 7167; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7168; GFX7-NEXT: s_mov_b32 s7, s10 7169; GFX7-NEXT: v_mov_b32_e32 v2, s9 7170; GFX7-NEXT: v_mov_b32_e32 v0, s8 7171; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7172; GFX7-NEXT: v_mov_b32_e32 v3, v0 7173; GFX7-NEXT: v_mov_b32_e32 v0, s6 7174; GFX7-NEXT: v_mov_b32_e32 v1, s7 7175; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7176; GFX7-NEXT: v_mov_b32_e32 v0, s4 7177; GFX7-NEXT: v_mov_b32_e32 v1, s5 7178; GFX7-NEXT: s_waitcnt vmcnt(0) 7179; GFX7-NEXT: flat_store_dword v[0:1], v2 7180; GFX7-NEXT: s_endpgm 7181; 7182; GFX10-WGP-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7183; GFX10-WGP: ; %bb.0: ; %entry 7184; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7185; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7186; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7187; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7188; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7189; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7190; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7191; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7192; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7193; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7194; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7195; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7196; GFX10-WGP-NEXT: s_endpgm 7197; 7198; GFX10-CU-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7199; GFX10-CU: ; %bb.0: ; %entry 7200; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7201; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7202; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7203; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7204; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7205; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7206; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7207; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7208; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7209; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7210; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7211; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7212; GFX10-CU-NEXT: s_endpgm 7213; 7214; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7215; SKIP-CACHE-INV: ; %bb.0: ; %entry 7216; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7217; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7218; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7219; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7220; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7221; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7222; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7223; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7224; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7225; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7226; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7227; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7228; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7229; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7230; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7231; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7232; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7233; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7234; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7235; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7236; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7237; SKIP-CACHE-INV-NEXT: s_endpgm 7238; 7239; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7240; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7241; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7242; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7243; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7244; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7245; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7246; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7247; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7248; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7249; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7250; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7251; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7252; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7253; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7254; 7255; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7256; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7257; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7258; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7259; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7260; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7261; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7262; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7263; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7264; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7265; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7266; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7267; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7268; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7269; GFX90A-TGSPLIT-NEXT: s_endpgm 7270; 7271; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7272; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7273; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7274; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7275; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7276; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7277; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7278; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7279; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7280; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7281; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7282; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7283; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7284; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7285; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7286; 7287; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7288; GFX940-TGSPLIT: ; %bb.0: ; %entry 7289; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7290; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7291; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7292; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7293; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7294; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7295; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7296; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7297; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7298; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7299; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7300; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7301; GFX940-TGSPLIT-NEXT: s_endpgm 7302; 7303; GFX11-WGP-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7304; GFX11-WGP: ; %bb.0: ; %entry 7305; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7306; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7307; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7308; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7309; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7310; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7311; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7312; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7313; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7314; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7315; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7316; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7317; GFX11-WGP-NEXT: s_endpgm 7318; 7319; GFX11-CU-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7320; GFX11-CU: ; %bb.0: ; %entry 7321; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7322; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7323; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7324; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7325; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7326; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7327; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7328; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7329; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7330; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7331; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7332; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7333; GFX11-CU-NEXT: s_endpgm 7334; 7335; GFX12-WGP-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7336; GFX12-WGP: ; %bb.0: ; %entry 7337; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7338; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7339; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7340; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7341; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7342; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7343; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7344; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7345; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7346; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7347; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7348; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7349; GFX12-WGP-NEXT: s_endpgm 7350; 7351; GFX12-CU-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: 7352; GFX12-CU: ; %bb.0: ; %entry 7353; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7354; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7355; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7356; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7357; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7358; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7359; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7360; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7361; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7362; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7363; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7364; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7365; GFX12-CU-NEXT: s_endpgm 7366 ptr addrspace(1) %out, i32 %in, i32 %old) { 7367entry: 7368 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7369 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire 7370 %val0 = extractvalue { i32, i1 } %val, 0 7371 store i32 %val0, ptr addrspace(1) %out, align 4 7372 ret void 7373} 7374 7375define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( 7376; GFX6-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7377; GFX6: ; %bb.0: ; %entry 7378; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7379; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7380; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7381; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7382; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7383; GFX6-NEXT: s_mov_b32 s12, s5 7384; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7385; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7386; GFX6-NEXT: s_mov_b32 s11, -1 7387; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7388; GFX6-NEXT: s_mov_b32 s5, s12 7389; GFX6-NEXT: s_mov_b32 s6, s11 7390; GFX6-NEXT: s_mov_b32 s7, s10 7391; GFX6-NEXT: v_mov_b32_e32 v0, s9 7392; GFX6-NEXT: v_mov_b32_e32 v2, s8 7393; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7394; GFX6-NEXT: v_mov_b32_e32 v1, v2 7395; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7396; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7397; GFX6-NEXT: s_waitcnt vmcnt(0) 7398; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7399; GFX6-NEXT: s_endpgm 7400; 7401; GFX7-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7402; GFX7: ; %bb.0: ; %entry 7403; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7404; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7405; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7406; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7407; GFX7-NEXT: s_mov_b64 s[12:13], 16 7408; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7409; GFX7-NEXT: s_mov_b32 s6, s4 7410; GFX7-NEXT: s_mov_b32 s7, s5 7411; GFX7-NEXT: s_mov_b32 s11, s12 7412; GFX7-NEXT: s_mov_b32 s10, s13 7413; GFX7-NEXT: s_add_u32 s6, s6, s11 7414; GFX7-NEXT: s_addc_u32 s10, s7, s10 7415; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7416; GFX7-NEXT: s_mov_b32 s7, s10 7417; GFX7-NEXT: v_mov_b32_e32 v2, s9 7418; GFX7-NEXT: v_mov_b32_e32 v0, s8 7419; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7420; GFX7-NEXT: v_mov_b32_e32 v3, v0 7421; GFX7-NEXT: v_mov_b32_e32 v0, s6 7422; GFX7-NEXT: v_mov_b32_e32 v1, s7 7423; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7424; GFX7-NEXT: v_mov_b32_e32 v0, s4 7425; GFX7-NEXT: v_mov_b32_e32 v1, s5 7426; GFX7-NEXT: s_waitcnt vmcnt(0) 7427; GFX7-NEXT: flat_store_dword v[0:1], v2 7428; GFX7-NEXT: s_endpgm 7429; 7430; GFX10-WGP-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7431; GFX10-WGP: ; %bb.0: ; %entry 7432; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7433; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7434; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7435; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7436; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7437; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7438; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7439; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7440; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7441; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7442; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7443; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7444; GFX10-WGP-NEXT: s_endpgm 7445; 7446; GFX10-CU-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7447; GFX10-CU: ; %bb.0: ; %entry 7448; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7449; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7450; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7451; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7452; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7453; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7454; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7455; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7456; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7457; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7458; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7459; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7460; GFX10-CU-NEXT: s_endpgm 7461; 7462; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7463; SKIP-CACHE-INV: ; %bb.0: ; %entry 7464; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7465; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7466; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7467; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7468; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7469; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7470; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7471; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7472; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7473; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7474; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7475; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7476; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7477; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7478; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7479; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7480; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7481; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7482; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7483; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7484; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7485; SKIP-CACHE-INV-NEXT: s_endpgm 7486; 7487; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7488; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7489; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7490; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7491; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7492; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7493; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7494; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7495; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7496; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7497; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7498; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7499; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7500; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7501; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7502; 7503; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7504; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7505; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7506; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7507; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7508; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7509; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7510; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7511; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7512; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7513; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7514; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7515; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7516; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7517; GFX90A-TGSPLIT-NEXT: s_endpgm 7518; 7519; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7520; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7521; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7522; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7523; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7524; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7525; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7526; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7527; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7528; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7529; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7530; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7531; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7532; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7533; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7534; 7535; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7536; GFX940-TGSPLIT: ; %bb.0: ; %entry 7537; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7538; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7539; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7540; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7541; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7542; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7543; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7544; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7546; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7547; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7548; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7549; GFX940-TGSPLIT-NEXT: s_endpgm 7550; 7551; GFX11-WGP-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7552; GFX11-WGP: ; %bb.0: ; %entry 7553; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7554; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7555; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7556; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7557; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7558; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7559; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7560; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7561; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7562; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7563; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7564; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7565; GFX11-WGP-NEXT: s_endpgm 7566; 7567; GFX11-CU-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7568; GFX11-CU: ; %bb.0: ; %entry 7569; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7570; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7571; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7572; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7573; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7574; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7575; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7576; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7577; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7578; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7579; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7580; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7581; GFX11-CU-NEXT: s_endpgm 7582; 7583; GFX12-WGP-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7584; GFX12-WGP: ; %bb.0: ; %entry 7585; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7586; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7587; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7588; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7589; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7590; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7591; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7592; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7593; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7594; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7595; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7596; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7597; GFX12-WGP-NEXT: s_endpgm 7598; 7599; GFX12-CU-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: 7600; GFX12-CU: ; %bb.0: ; %entry 7601; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7602; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7603; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7604; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7605; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7606; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7607; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7608; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7609; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7610; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7611; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7612; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7613; GFX12-CU-NEXT: s_endpgm 7614 ptr addrspace(1) %out, i32 %in, i32 %old) { 7615entry: 7616 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7617 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire 7618 %val0 = extractvalue { i32, i1 } %val, 0 7619 store i32 %val0, ptr addrspace(1) %out, align 4 7620 ret void 7621} 7622 7623define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( 7624; GFX6-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7625; GFX6: ; %bb.0: ; %entry 7626; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7627; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7628; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7629; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7630; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7631; GFX6-NEXT: s_mov_b32 s12, s5 7632; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7633; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7634; GFX6-NEXT: s_mov_b32 s11, -1 7635; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7636; GFX6-NEXT: s_mov_b32 s5, s12 7637; GFX6-NEXT: s_mov_b32 s6, s11 7638; GFX6-NEXT: s_mov_b32 s7, s10 7639; GFX6-NEXT: v_mov_b32_e32 v0, s9 7640; GFX6-NEXT: v_mov_b32_e32 v2, s8 7641; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7642; GFX6-NEXT: v_mov_b32_e32 v1, v2 7643; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7644; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7645; GFX6-NEXT: s_waitcnt vmcnt(0) 7646; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7647; GFX6-NEXT: s_endpgm 7648; 7649; GFX7-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7650; GFX7: ; %bb.0: ; %entry 7651; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7652; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7653; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7654; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7655; GFX7-NEXT: s_mov_b64 s[12:13], 16 7656; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7657; GFX7-NEXT: s_mov_b32 s6, s4 7658; GFX7-NEXT: s_mov_b32 s7, s5 7659; GFX7-NEXT: s_mov_b32 s11, s12 7660; GFX7-NEXT: s_mov_b32 s10, s13 7661; GFX7-NEXT: s_add_u32 s6, s6, s11 7662; GFX7-NEXT: s_addc_u32 s10, s7, s10 7663; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7664; GFX7-NEXT: s_mov_b32 s7, s10 7665; GFX7-NEXT: v_mov_b32_e32 v2, s9 7666; GFX7-NEXT: v_mov_b32_e32 v0, s8 7667; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7668; GFX7-NEXT: v_mov_b32_e32 v3, v0 7669; GFX7-NEXT: v_mov_b32_e32 v0, s6 7670; GFX7-NEXT: v_mov_b32_e32 v1, s7 7671; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7672; GFX7-NEXT: v_mov_b32_e32 v0, s4 7673; GFX7-NEXT: v_mov_b32_e32 v1, s5 7674; GFX7-NEXT: s_waitcnt vmcnt(0) 7675; GFX7-NEXT: flat_store_dword v[0:1], v2 7676; GFX7-NEXT: s_endpgm 7677; 7678; GFX10-WGP-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7679; GFX10-WGP: ; %bb.0: ; %entry 7680; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7681; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7682; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7683; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7684; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7685; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7686; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7687; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7688; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7689; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7690; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7691; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7692; GFX10-WGP-NEXT: s_endpgm 7693; 7694; GFX10-CU-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7695; GFX10-CU: ; %bb.0: ; %entry 7696; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7697; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7698; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7699; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7700; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7701; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7702; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7703; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7704; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7705; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7706; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7707; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7708; GFX10-CU-NEXT: s_endpgm 7709; 7710; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7711; SKIP-CACHE-INV: ; %bb.0: ; %entry 7712; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7713; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7714; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7715; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7716; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7717; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7718; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7719; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7720; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7721; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7722; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7723; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7724; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7725; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7726; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7727; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7728; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7729; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7730; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7731; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7732; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7733; SKIP-CACHE-INV-NEXT: s_endpgm 7734; 7735; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7736; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7737; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7738; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7739; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7740; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7741; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7742; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7743; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7744; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7745; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7746; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7747; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7748; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7749; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7750; 7751; GFX90A-TGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7752; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7753; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7754; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7755; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7756; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7757; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7758; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7759; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7760; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7761; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7762; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7763; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7764; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7765; GFX90A-TGSPLIT-NEXT: s_endpgm 7766; 7767; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7768; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7769; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7770; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7771; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7772; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7773; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7774; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7775; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7776; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7777; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7778; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7779; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7780; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7781; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7782; 7783; GFX940-TGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7784; GFX940-TGSPLIT: ; %bb.0: ; %entry 7785; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7786; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7787; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7788; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7789; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7790; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7791; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 7792; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7793; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7794; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 7795; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7796; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 7797; GFX940-TGSPLIT-NEXT: s_endpgm 7798; 7799; GFX11-WGP-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7800; GFX11-WGP: ; %bb.0: ; %entry 7801; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 7802; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7803; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7804; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7805; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7806; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 7807; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 7808; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7809; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 7810; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7811; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 7812; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7813; GFX11-WGP-NEXT: s_endpgm 7814; 7815; GFX11-CU-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7816; GFX11-CU: ; %bb.0: ; %entry 7817; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 7818; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7819; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7820; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7821; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7822; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 7823; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 7824; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7825; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 7826; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 7827; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 7828; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7829; GFX11-CU-NEXT: s_endpgm 7830; 7831; GFX12-WGP-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7832; GFX12-WGP: ; %bb.0: ; %entry 7833; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 7834; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7835; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7836; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7837; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7838; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 7839; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 7840; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7841; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 7842; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7843; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 7844; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 7845; GFX12-WGP-NEXT: s_endpgm 7846; 7847; GFX12-CU-LABEL: global_wavefront_release_acquire_ret_cmpxchg: 7848; GFX12-CU: ; %bb.0: ; %entry 7849; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 7850; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7851; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7852; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7853; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7854; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 7855; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 7856; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7857; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 7858; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 7859; GFX12-CU-NEXT: s_wait_loadcnt 0x0 7860; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 7861; GFX12-CU-NEXT: s_endpgm 7862 ptr addrspace(1) %out, i32 %in, i32 %old) { 7863entry: 7864 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 7865 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release acquire 7866 %val0 = extractvalue { i32, i1 } %val, 0 7867 store i32 %val0, ptr addrspace(1) %out, align 4 7868 ret void 7869} 7870 7871define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( 7872; GFX6-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7873; GFX6: ; %bb.0: ; %entry 7874; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 7875; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7876; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 7877; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 7878; GFX6-NEXT: s_waitcnt lgkmcnt(0) 7879; GFX6-NEXT: s_mov_b32 s12, s5 7880; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 7881; GFX6-NEXT: s_mov_b32 s10, 0x100f000 7882; GFX6-NEXT: s_mov_b32 s11, -1 7883; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 7884; GFX6-NEXT: s_mov_b32 s5, s12 7885; GFX6-NEXT: s_mov_b32 s6, s11 7886; GFX6-NEXT: s_mov_b32 s7, s10 7887; GFX6-NEXT: v_mov_b32_e32 v0, s9 7888; GFX6-NEXT: v_mov_b32_e32 v2, s8 7889; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7890; GFX6-NEXT: v_mov_b32_e32 v1, v2 7891; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 7892; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7893; GFX6-NEXT: s_waitcnt vmcnt(0) 7894; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 7895; GFX6-NEXT: s_endpgm 7896; 7897; GFX7-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7898; GFX7: ; %bb.0: ; %entry 7899; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7900; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7901; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7902; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7903; GFX7-NEXT: s_mov_b64 s[12:13], 16 7904; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7905; GFX7-NEXT: s_mov_b32 s6, s4 7906; GFX7-NEXT: s_mov_b32 s7, s5 7907; GFX7-NEXT: s_mov_b32 s11, s12 7908; GFX7-NEXT: s_mov_b32 s10, s13 7909; GFX7-NEXT: s_add_u32 s6, s6, s11 7910; GFX7-NEXT: s_addc_u32 s10, s7, s10 7911; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7912; GFX7-NEXT: s_mov_b32 s7, s10 7913; GFX7-NEXT: v_mov_b32_e32 v2, s9 7914; GFX7-NEXT: v_mov_b32_e32 v0, s8 7915; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7916; GFX7-NEXT: v_mov_b32_e32 v3, v0 7917; GFX7-NEXT: v_mov_b32_e32 v0, s6 7918; GFX7-NEXT: v_mov_b32_e32 v1, s7 7919; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7920; GFX7-NEXT: v_mov_b32_e32 v0, s4 7921; GFX7-NEXT: v_mov_b32_e32 v1, s5 7922; GFX7-NEXT: s_waitcnt vmcnt(0) 7923; GFX7-NEXT: flat_store_dword v[0:1], v2 7924; GFX7-NEXT: s_endpgm 7925; 7926; GFX10-WGP-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7927; GFX10-WGP: ; %bb.0: ; %entry 7928; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 7929; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7930; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 7931; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 7932; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7933; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7934; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 7935; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7936; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 7937; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7938; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 7939; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 7940; GFX10-WGP-NEXT: s_endpgm 7941; 7942; GFX10-CU-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7943; GFX10-CU: ; %bb.0: ; %entry 7944; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 7945; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7946; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 7947; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 7948; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7949; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7950; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 7951; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 7952; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 7953; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 7954; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 7955; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 7956; GFX10-CU-NEXT: s_endpgm 7957; 7958; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7959; SKIP-CACHE-INV: ; %bb.0: ; %entry 7960; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7961; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7962; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7963; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7964; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7965; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 7966; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 7967; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 7968; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 7969; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 7970; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 7971; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 7972; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7973; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 7974; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 7975; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 7976; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 7977; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 7978; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 7979; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 7980; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 7981; SKIP-CACHE-INV-NEXT: s_endpgm 7982; 7983; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 7984; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7985; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 7986; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7987; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7988; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7989; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7990; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7991; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 7992; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7993; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 7994; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 7995; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 7996; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 7997; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7998; 7999; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8000; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8001; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8002; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8003; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8004; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8005; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8007; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8008; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8009; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8010; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8011; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8012; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8013; GFX90A-TGSPLIT-NEXT: s_endpgm 8014; 8015; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8016; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8017; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8018; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8019; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8020; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8021; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8022; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8023; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8024; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8025; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8026; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8027; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8028; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8029; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8030; 8031; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8032; GFX940-TGSPLIT: ; %bb.0: ; %entry 8033; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8034; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8035; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8036; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8037; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8038; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8039; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8040; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8042; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8043; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8044; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8045; GFX940-TGSPLIT-NEXT: s_endpgm 8046; 8047; GFX11-WGP-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8048; GFX11-WGP: ; %bb.0: ; %entry 8049; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8050; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8051; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8052; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8053; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8054; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8055; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8056; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8057; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8058; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8059; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8060; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8061; GFX11-WGP-NEXT: s_endpgm 8062; 8063; GFX11-CU-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8064; GFX11-CU: ; %bb.0: ; %entry 8065; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8066; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8067; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8068; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8069; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8070; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8071; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8072; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8073; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8074; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8075; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8076; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8077; GFX11-CU-NEXT: s_endpgm 8078; 8079; GFX12-WGP-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8080; GFX12-WGP: ; %bb.0: ; %entry 8081; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8082; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8083; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8084; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8085; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8086; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8087; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8088; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8089; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8090; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8091; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8092; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8093; GFX12-WGP-NEXT: s_endpgm 8094; 8095; GFX12-CU-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: 8096; GFX12-CU: ; %bb.0: ; %entry 8097; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8098; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8099; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8100; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8101; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8102; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8103; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8104; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8105; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8106; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8107; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8108; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8109; GFX12-CU-NEXT: s_endpgm 8110 ptr addrspace(1) %out, i32 %in, i32 %old) { 8111entry: 8112 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8113 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire 8114 %val0 = extractvalue { i32, i1 } %val, 0 8115 store i32 %val0, ptr addrspace(1) %out, align 4 8116 ret void 8117} 8118 8119define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( 8120; GFX6-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8121; GFX6: ; %bb.0: ; %entry 8122; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8123; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8124; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8125; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8126; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8127; GFX6-NEXT: s_mov_b32 s12, s5 8128; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8129; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8130; GFX6-NEXT: s_mov_b32 s11, -1 8131; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8132; GFX6-NEXT: s_mov_b32 s5, s12 8133; GFX6-NEXT: s_mov_b32 s6, s11 8134; GFX6-NEXT: s_mov_b32 s7, s10 8135; GFX6-NEXT: v_mov_b32_e32 v0, s9 8136; GFX6-NEXT: v_mov_b32_e32 v2, s8 8137; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8138; GFX6-NEXT: v_mov_b32_e32 v1, v2 8139; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8140; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8141; GFX6-NEXT: s_waitcnt vmcnt(0) 8142; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8143; GFX6-NEXT: s_endpgm 8144; 8145; GFX7-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8146; GFX7: ; %bb.0: ; %entry 8147; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8148; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8149; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8150; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8151; GFX7-NEXT: s_mov_b64 s[12:13], 16 8152; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8153; GFX7-NEXT: s_mov_b32 s6, s4 8154; GFX7-NEXT: s_mov_b32 s7, s5 8155; GFX7-NEXT: s_mov_b32 s11, s12 8156; GFX7-NEXT: s_mov_b32 s10, s13 8157; GFX7-NEXT: s_add_u32 s6, s6, s11 8158; GFX7-NEXT: s_addc_u32 s10, s7, s10 8159; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8160; GFX7-NEXT: s_mov_b32 s7, s10 8161; GFX7-NEXT: v_mov_b32_e32 v2, s9 8162; GFX7-NEXT: v_mov_b32_e32 v0, s8 8163; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8164; GFX7-NEXT: v_mov_b32_e32 v3, v0 8165; GFX7-NEXT: v_mov_b32_e32 v0, s6 8166; GFX7-NEXT: v_mov_b32_e32 v1, s7 8167; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8168; GFX7-NEXT: v_mov_b32_e32 v0, s4 8169; GFX7-NEXT: v_mov_b32_e32 v1, s5 8170; GFX7-NEXT: s_waitcnt vmcnt(0) 8171; GFX7-NEXT: flat_store_dword v[0:1], v2 8172; GFX7-NEXT: s_endpgm 8173; 8174; GFX10-WGP-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8175; GFX10-WGP: ; %bb.0: ; %entry 8176; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8177; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8178; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8179; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8180; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8181; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8182; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8183; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8184; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8185; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8186; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8187; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8188; GFX10-WGP-NEXT: s_endpgm 8189; 8190; GFX10-CU-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8191; GFX10-CU: ; %bb.0: ; %entry 8192; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8193; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8194; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8195; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8196; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8197; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8198; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8199; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8200; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8201; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8202; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8203; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8204; GFX10-CU-NEXT: s_endpgm 8205; 8206; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8207; SKIP-CACHE-INV: ; %bb.0: ; %entry 8208; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8209; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8210; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8211; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8212; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8213; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8214; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8215; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8216; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8217; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8218; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8219; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8220; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8221; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8222; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8223; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8224; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8225; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8226; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8227; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8228; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8229; SKIP-CACHE-INV-NEXT: s_endpgm 8230; 8231; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8232; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8233; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8234; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8235; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8236; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8237; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8238; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8239; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8240; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8241; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8242; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8243; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8244; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8245; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8246; 8247; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8248; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8249; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8250; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8251; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8252; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8253; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8254; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8255; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8256; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8257; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8258; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8259; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8260; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8261; GFX90A-TGSPLIT-NEXT: s_endpgm 8262; 8263; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8264; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8265; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8266; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8267; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8268; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8269; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8270; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8271; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8272; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8273; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8274; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8275; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8276; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8277; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8278; 8279; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8280; GFX940-TGSPLIT: ; %bb.0: ; %entry 8281; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8282; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8283; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8284; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8285; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8286; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8287; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8288; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8289; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8290; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8291; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8292; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8293; GFX940-TGSPLIT-NEXT: s_endpgm 8294; 8295; GFX11-WGP-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8296; GFX11-WGP: ; %bb.0: ; %entry 8297; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8298; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8299; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8300; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8301; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8302; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8303; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8304; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8305; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8306; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8307; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8308; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8309; GFX11-WGP-NEXT: s_endpgm 8310; 8311; GFX11-CU-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8312; GFX11-CU: ; %bb.0: ; %entry 8313; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8314; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8315; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8316; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8317; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8318; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8319; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8320; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8321; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8322; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8323; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8324; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8325; GFX11-CU-NEXT: s_endpgm 8326; 8327; GFX12-WGP-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8328; GFX12-WGP: ; %bb.0: ; %entry 8329; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8330; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8331; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8332; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8333; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8334; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8335; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8336; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8337; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8338; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8339; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8340; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8341; GFX12-WGP-NEXT: s_endpgm 8342; 8343; GFX12-CU-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: 8344; GFX12-CU: ; %bb.0: ; %entry 8345; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8346; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8347; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8348; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8349; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8350; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8351; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8352; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8353; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8354; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8355; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8356; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8357; GFX12-CU-NEXT: s_endpgm 8358 ptr addrspace(1) %out, i32 %in, i32 %old) { 8359entry: 8360 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8361 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire 8362 %val0 = extractvalue { i32, i1 } %val, 0 8363 store i32 %val0, ptr addrspace(1) %out, align 4 8364 ret void 8365} 8366 8367define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg( 8368; GFX6-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8369; GFX6: ; %bb.0: ; %entry 8370; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8371; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8372; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8373; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8374; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8375; GFX6-NEXT: s_mov_b32 s12, s5 8376; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8377; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8378; GFX6-NEXT: s_mov_b32 s11, -1 8379; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8380; GFX6-NEXT: s_mov_b32 s5, s12 8381; GFX6-NEXT: s_mov_b32 s6, s11 8382; GFX6-NEXT: s_mov_b32 s7, s10 8383; GFX6-NEXT: v_mov_b32_e32 v0, s9 8384; GFX6-NEXT: v_mov_b32_e32 v2, s8 8385; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8386; GFX6-NEXT: v_mov_b32_e32 v1, v2 8387; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8388; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8389; GFX6-NEXT: s_waitcnt vmcnt(0) 8390; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8391; GFX6-NEXT: s_endpgm 8392; 8393; GFX7-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8394; GFX7: ; %bb.0: ; %entry 8395; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8396; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8397; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8398; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8399; GFX7-NEXT: s_mov_b64 s[12:13], 16 8400; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8401; GFX7-NEXT: s_mov_b32 s6, s4 8402; GFX7-NEXT: s_mov_b32 s7, s5 8403; GFX7-NEXT: s_mov_b32 s11, s12 8404; GFX7-NEXT: s_mov_b32 s10, s13 8405; GFX7-NEXT: s_add_u32 s6, s6, s11 8406; GFX7-NEXT: s_addc_u32 s10, s7, s10 8407; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8408; GFX7-NEXT: s_mov_b32 s7, s10 8409; GFX7-NEXT: v_mov_b32_e32 v2, s9 8410; GFX7-NEXT: v_mov_b32_e32 v0, s8 8411; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8412; GFX7-NEXT: v_mov_b32_e32 v3, v0 8413; GFX7-NEXT: v_mov_b32_e32 v0, s6 8414; GFX7-NEXT: v_mov_b32_e32 v1, s7 8415; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8416; GFX7-NEXT: v_mov_b32_e32 v0, s4 8417; GFX7-NEXT: v_mov_b32_e32 v1, s5 8418; GFX7-NEXT: s_waitcnt vmcnt(0) 8419; GFX7-NEXT: flat_store_dword v[0:1], v2 8420; GFX7-NEXT: s_endpgm 8421; 8422; GFX10-WGP-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8423; GFX10-WGP: ; %bb.0: ; %entry 8424; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8425; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8426; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8427; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8428; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8429; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8430; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8431; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8432; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8433; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8434; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8435; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8436; GFX10-WGP-NEXT: s_endpgm 8437; 8438; GFX10-CU-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8439; GFX10-CU: ; %bb.0: ; %entry 8440; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8441; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8442; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8443; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8444; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8445; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8446; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8447; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8448; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8449; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8450; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8451; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8452; GFX10-CU-NEXT: s_endpgm 8453; 8454; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8455; SKIP-CACHE-INV: ; %bb.0: ; %entry 8456; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8457; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8458; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8459; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8460; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8461; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8462; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8463; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8464; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8465; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8466; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8467; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8468; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8469; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8470; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8471; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8472; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8473; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8474; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8475; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8476; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8477; SKIP-CACHE-INV-NEXT: s_endpgm 8478; 8479; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8480; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8481; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8482; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8483; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8484; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8485; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8486; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8487; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8488; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8489; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8490; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8491; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8492; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8493; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8494; 8495; GFX90A-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8496; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8497; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8498; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8499; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8500; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8501; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8502; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8503; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8504; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8505; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8506; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8507; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8508; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8509; GFX90A-TGSPLIT-NEXT: s_endpgm 8510; 8511; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8512; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8513; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8514; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8515; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8516; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8517; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8518; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8519; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8520; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8521; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8522; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8523; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8524; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8525; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8526; 8527; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8528; GFX940-TGSPLIT: ; %bb.0: ; %entry 8529; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8530; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8531; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8532; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8533; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8534; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8535; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8536; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8537; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8538; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8539; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8540; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8541; GFX940-TGSPLIT-NEXT: s_endpgm 8542; 8543; GFX11-WGP-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8544; GFX11-WGP: ; %bb.0: ; %entry 8545; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8546; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8547; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8548; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8549; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8550; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8551; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8552; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8553; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8554; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8555; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8556; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8557; GFX11-WGP-NEXT: s_endpgm 8558; 8559; GFX11-CU-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8560; GFX11-CU: ; %bb.0: ; %entry 8561; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8562; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8563; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8564; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8565; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8566; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8567; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8568; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8569; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8570; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8571; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8572; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8573; GFX11-CU-NEXT: s_endpgm 8574; 8575; GFX12-WGP-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8576; GFX12-WGP: ; %bb.0: ; %entry 8577; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8578; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8579; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8580; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8581; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8582; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8583; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8584; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8585; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8586; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8587; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8588; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8589; GFX12-WGP-NEXT: s_endpgm 8590; 8591; GFX12-CU-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: 8592; GFX12-CU: ; %bb.0: ; %entry 8593; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8594; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8595; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8596; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8597; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8598; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8599; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8600; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8601; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8602; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8603; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8604; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8605; GFX12-CU-NEXT: s_endpgm 8606 ptr addrspace(1) %out, i32 %in, i32 %old) { 8607entry: 8608 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8609 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst 8610 %val0 = extractvalue { i32, i1 } %val, 0 8611 store i32 %val0, ptr addrspace(1) %out, align 4 8612 ret void 8613} 8614 8615define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg( 8616; GFX6-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8617; GFX6: ; %bb.0: ; %entry 8618; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8619; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8620; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8621; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8622; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8623; GFX6-NEXT: s_mov_b32 s12, s5 8624; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8625; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8626; GFX6-NEXT: s_mov_b32 s11, -1 8627; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8628; GFX6-NEXT: s_mov_b32 s5, s12 8629; GFX6-NEXT: s_mov_b32 s6, s11 8630; GFX6-NEXT: s_mov_b32 s7, s10 8631; GFX6-NEXT: v_mov_b32_e32 v0, s9 8632; GFX6-NEXT: v_mov_b32_e32 v2, s8 8633; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8634; GFX6-NEXT: v_mov_b32_e32 v1, v2 8635; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8636; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8637; GFX6-NEXT: s_waitcnt vmcnt(0) 8638; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8639; GFX6-NEXT: s_endpgm 8640; 8641; GFX7-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8642; GFX7: ; %bb.0: ; %entry 8643; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8644; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8645; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8646; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8647; GFX7-NEXT: s_mov_b64 s[12:13], 16 8648; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8649; GFX7-NEXT: s_mov_b32 s6, s4 8650; GFX7-NEXT: s_mov_b32 s7, s5 8651; GFX7-NEXT: s_mov_b32 s11, s12 8652; GFX7-NEXT: s_mov_b32 s10, s13 8653; GFX7-NEXT: s_add_u32 s6, s6, s11 8654; GFX7-NEXT: s_addc_u32 s10, s7, s10 8655; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8656; GFX7-NEXT: s_mov_b32 s7, s10 8657; GFX7-NEXT: v_mov_b32_e32 v2, s9 8658; GFX7-NEXT: v_mov_b32_e32 v0, s8 8659; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8660; GFX7-NEXT: v_mov_b32_e32 v3, v0 8661; GFX7-NEXT: v_mov_b32_e32 v0, s6 8662; GFX7-NEXT: v_mov_b32_e32 v1, s7 8663; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8664; GFX7-NEXT: v_mov_b32_e32 v0, s4 8665; GFX7-NEXT: v_mov_b32_e32 v1, s5 8666; GFX7-NEXT: s_waitcnt vmcnt(0) 8667; GFX7-NEXT: flat_store_dword v[0:1], v2 8668; GFX7-NEXT: s_endpgm 8669; 8670; GFX10-WGP-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8671; GFX10-WGP: ; %bb.0: ; %entry 8672; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8673; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8674; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8675; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8676; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8677; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8678; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8679; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8680; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8681; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8682; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8683; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8684; GFX10-WGP-NEXT: s_endpgm 8685; 8686; GFX10-CU-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8687; GFX10-CU: ; %bb.0: ; %entry 8688; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8689; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8690; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8691; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8692; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8693; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8694; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8695; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8696; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8697; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8698; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8699; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8700; GFX10-CU-NEXT: s_endpgm 8701; 8702; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8703; SKIP-CACHE-INV: ; %bb.0: ; %entry 8704; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8705; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8706; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8707; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8708; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8709; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8710; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8711; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8712; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8713; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8714; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8715; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8716; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8717; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8718; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8719; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8720; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8721; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8722; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8723; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8724; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8725; SKIP-CACHE-INV-NEXT: s_endpgm 8726; 8727; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8728; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8729; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8730; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8731; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8732; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8733; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8734; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8735; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8736; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8737; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8738; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8739; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8740; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8741; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8742; 8743; GFX90A-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8744; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8745; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8746; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8747; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8748; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8749; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8750; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8751; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8752; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8753; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8754; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8755; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8756; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8757; GFX90A-TGSPLIT-NEXT: s_endpgm 8758; 8759; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8760; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8761; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8762; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8763; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8764; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8765; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8766; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8767; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8768; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8769; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8770; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8771; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8772; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8773; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8774; 8775; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8776; GFX940-TGSPLIT: ; %bb.0: ; %entry 8777; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8778; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8779; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8780; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8781; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8782; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8783; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 8784; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8785; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8786; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 8787; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8788; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 8789; GFX940-TGSPLIT-NEXT: s_endpgm 8790; 8791; GFX11-WGP-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8792; GFX11-WGP: ; %bb.0: ; %entry 8793; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 8794; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8795; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8796; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8797; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8798; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 8799; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 8800; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8801; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 8802; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8803; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 8804; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8805; GFX11-WGP-NEXT: s_endpgm 8806; 8807; GFX11-CU-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8808; GFX11-CU: ; %bb.0: ; %entry 8809; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 8810; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8811; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8812; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8813; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8814; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 8815; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 8816; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8817; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 8818; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 8819; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 8820; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8821; GFX11-CU-NEXT: s_endpgm 8822; 8823; GFX12-WGP-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8824; GFX12-WGP: ; %bb.0: ; %entry 8825; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 8826; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8827; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8828; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8829; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8830; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 8831; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 8832; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8833; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 8834; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8835; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 8836; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 8837; GFX12-WGP-NEXT: s_endpgm 8838; 8839; GFX12-CU-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: 8840; GFX12-CU: ; %bb.0: ; %entry 8841; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 8842; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8843; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8844; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8845; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8846; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 8847; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 8848; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8849; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 8850; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 8851; GFX12-CU-NEXT: s_wait_loadcnt 0x0 8852; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 8853; GFX12-CU-NEXT: s_endpgm 8854 ptr addrspace(1) %out, i32 %in, i32 %old) { 8855entry: 8856 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 8857 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst 8858 %val0 = extractvalue { i32, i1 } %val, 0 8859 store i32 %val0, ptr addrspace(1) %out, align 4 8860 ret void 8861} 8862 8863define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg( 8864; GFX6-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8865; GFX6: ; %bb.0: ; %entry 8866; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 8867; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8868; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 8869; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 8870; GFX6-NEXT: s_waitcnt lgkmcnt(0) 8871; GFX6-NEXT: s_mov_b32 s12, s5 8872; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 8873; GFX6-NEXT: s_mov_b32 s10, 0x100f000 8874; GFX6-NEXT: s_mov_b32 s11, -1 8875; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 8876; GFX6-NEXT: s_mov_b32 s5, s12 8877; GFX6-NEXT: s_mov_b32 s6, s11 8878; GFX6-NEXT: s_mov_b32 s7, s10 8879; GFX6-NEXT: v_mov_b32_e32 v0, s9 8880; GFX6-NEXT: v_mov_b32_e32 v2, s8 8881; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8882; GFX6-NEXT: v_mov_b32_e32 v1, v2 8883; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 8884; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8885; GFX6-NEXT: s_waitcnt vmcnt(0) 8886; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 8887; GFX6-NEXT: s_endpgm 8888; 8889; GFX7-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8890; GFX7: ; %bb.0: ; %entry 8891; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8892; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8893; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8894; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8895; GFX7-NEXT: s_mov_b64 s[12:13], 16 8896; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8897; GFX7-NEXT: s_mov_b32 s6, s4 8898; GFX7-NEXT: s_mov_b32 s7, s5 8899; GFX7-NEXT: s_mov_b32 s11, s12 8900; GFX7-NEXT: s_mov_b32 s10, s13 8901; GFX7-NEXT: s_add_u32 s6, s6, s11 8902; GFX7-NEXT: s_addc_u32 s10, s7, s10 8903; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8904; GFX7-NEXT: s_mov_b32 s7, s10 8905; GFX7-NEXT: v_mov_b32_e32 v2, s9 8906; GFX7-NEXT: v_mov_b32_e32 v0, s8 8907; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8908; GFX7-NEXT: v_mov_b32_e32 v3, v0 8909; GFX7-NEXT: v_mov_b32_e32 v0, s6 8910; GFX7-NEXT: v_mov_b32_e32 v1, s7 8911; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8912; GFX7-NEXT: v_mov_b32_e32 v0, s4 8913; GFX7-NEXT: v_mov_b32_e32 v1, s5 8914; GFX7-NEXT: s_waitcnt vmcnt(0) 8915; GFX7-NEXT: flat_store_dword v[0:1], v2 8916; GFX7-NEXT: s_endpgm 8917; 8918; GFX10-WGP-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8919; GFX10-WGP: ; %bb.0: ; %entry 8920; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 8921; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8922; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 8923; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 8924; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8925; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8926; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 8927; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8928; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 8929; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8930; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 8931; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 8932; GFX10-WGP-NEXT: s_endpgm 8933; 8934; GFX10-CU-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8935; GFX10-CU: ; %bb.0: ; %entry 8936; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 8937; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8938; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 8939; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 8940; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8941; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8942; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 8943; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 8944; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 8945; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 8946; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 8947; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 8948; GFX10-CU-NEXT: s_endpgm 8949; 8950; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8951; SKIP-CACHE-INV: ; %bb.0: ; %entry 8952; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8953; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8954; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8955; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8956; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8957; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 8958; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 8959; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 8960; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 8961; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 8962; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 8963; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 8964; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8965; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 8966; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 8967; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 8968; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 8969; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 8970; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 8971; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 8972; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 8973; SKIP-CACHE-INV-NEXT: s_endpgm 8974; 8975; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8976; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8977; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8978; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8979; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8980; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8981; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8982; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8983; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 8984; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8985; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 8986; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 8987; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 8988; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 8989; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8990; 8991; GFX90A-TGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 8992; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8993; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 8994; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8995; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8996; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8997; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8998; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8999; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9000; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9001; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9002; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9003; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9004; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9005; GFX90A-TGSPLIT-NEXT: s_endpgm 9006; 9007; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9008; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9009; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9010; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9011; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9012; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9013; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9014; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9015; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9016; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9017; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9018; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9019; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9020; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9021; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9022; 9023; GFX940-TGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9024; GFX940-TGSPLIT: ; %bb.0: ; %entry 9025; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9026; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9027; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9028; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9029; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9030; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9031; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9032; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9033; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9034; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9035; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9036; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9037; GFX940-TGSPLIT-NEXT: s_endpgm 9038; 9039; GFX11-WGP-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9040; GFX11-WGP: ; %bb.0: ; %entry 9041; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9042; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9043; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9044; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9045; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9046; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9047; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9048; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9049; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9050; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9051; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9052; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9053; GFX11-WGP-NEXT: s_endpgm 9054; 9055; GFX11-CU-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9056; GFX11-CU: ; %bb.0: ; %entry 9057; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9058; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9059; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9060; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9061; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9062; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9063; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9064; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9065; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9066; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9067; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9068; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9069; GFX11-CU-NEXT: s_endpgm 9070; 9071; GFX12-WGP-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9072; GFX12-WGP: ; %bb.0: ; %entry 9073; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9074; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9075; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9076; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9077; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9078; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9079; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9080; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9081; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9082; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9083; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9084; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9085; GFX12-WGP-NEXT: s_endpgm 9086; 9087; GFX12-CU-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: 9088; GFX12-CU: ; %bb.0: ; %entry 9089; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9090; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9091; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9092; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9093; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9094; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9095; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9096; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9097; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9098; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9099; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9100; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9101; GFX12-CU-NEXT: s_endpgm 9102 ptr addrspace(1) %out, i32 %in, i32 %old) { 9103entry: 9104 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9105 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst 9106 %val0 = extractvalue { i32, i1 } %val, 0 9107 store i32 %val0, ptr addrspace(1) %out, align 4 9108 ret void 9109} 9110 9111define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg( 9112; GFX6-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9113; GFX6: ; %bb.0: ; %entry 9114; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9115; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9116; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9117; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9118; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9119; GFX6-NEXT: s_mov_b32 s12, s5 9120; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9121; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9122; GFX6-NEXT: s_mov_b32 s11, -1 9123; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9124; GFX6-NEXT: s_mov_b32 s5, s12 9125; GFX6-NEXT: s_mov_b32 s6, s11 9126; GFX6-NEXT: s_mov_b32 s7, s10 9127; GFX6-NEXT: v_mov_b32_e32 v0, s9 9128; GFX6-NEXT: v_mov_b32_e32 v2, s8 9129; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9130; GFX6-NEXT: v_mov_b32_e32 v1, v2 9131; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9132; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9133; GFX6-NEXT: s_waitcnt vmcnt(0) 9134; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9135; GFX6-NEXT: s_endpgm 9136; 9137; GFX7-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9138; GFX7: ; %bb.0: ; %entry 9139; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9140; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9141; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9142; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9143; GFX7-NEXT: s_mov_b64 s[12:13], 16 9144; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9145; GFX7-NEXT: s_mov_b32 s6, s4 9146; GFX7-NEXT: s_mov_b32 s7, s5 9147; GFX7-NEXT: s_mov_b32 s11, s12 9148; GFX7-NEXT: s_mov_b32 s10, s13 9149; GFX7-NEXT: s_add_u32 s6, s6, s11 9150; GFX7-NEXT: s_addc_u32 s10, s7, s10 9151; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9152; GFX7-NEXT: s_mov_b32 s7, s10 9153; GFX7-NEXT: v_mov_b32_e32 v2, s9 9154; GFX7-NEXT: v_mov_b32_e32 v0, s8 9155; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9156; GFX7-NEXT: v_mov_b32_e32 v3, v0 9157; GFX7-NEXT: v_mov_b32_e32 v0, s6 9158; GFX7-NEXT: v_mov_b32_e32 v1, s7 9159; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9160; GFX7-NEXT: v_mov_b32_e32 v0, s4 9161; GFX7-NEXT: v_mov_b32_e32 v1, s5 9162; GFX7-NEXT: s_waitcnt vmcnt(0) 9163; GFX7-NEXT: flat_store_dword v[0:1], v2 9164; GFX7-NEXT: s_endpgm 9165; 9166; GFX10-WGP-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9167; GFX10-WGP: ; %bb.0: ; %entry 9168; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9169; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9170; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9171; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9172; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9173; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9174; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9175; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9176; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9177; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9178; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9179; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9180; GFX10-WGP-NEXT: s_endpgm 9181; 9182; GFX10-CU-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9183; GFX10-CU: ; %bb.0: ; %entry 9184; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9185; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9186; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9187; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9188; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9189; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9190; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9191; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9192; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9193; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9194; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9195; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9196; GFX10-CU-NEXT: s_endpgm 9197; 9198; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9199; SKIP-CACHE-INV: ; %bb.0: ; %entry 9200; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9201; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9202; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9203; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9204; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9205; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9206; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9207; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9208; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9209; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9210; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9211; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9212; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9213; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9214; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9215; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9216; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9217; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9218; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9219; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9220; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9221; SKIP-CACHE-INV-NEXT: s_endpgm 9222; 9223; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9224; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9225; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9226; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9227; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9228; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9229; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9230; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9231; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9232; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9233; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9234; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9235; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9236; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9237; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9238; 9239; GFX90A-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9240; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9241; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9242; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9243; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9244; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9245; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9246; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9247; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9248; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9249; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9250; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9251; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9252; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9253; GFX90A-TGSPLIT-NEXT: s_endpgm 9254; 9255; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9256; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9257; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9258; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9259; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9260; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9261; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9262; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9263; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9264; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9265; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9266; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9267; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9268; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9269; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9270; 9271; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9272; GFX940-TGSPLIT: ; %bb.0: ; %entry 9273; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9274; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9275; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9276; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9277; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9278; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9279; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9280; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9281; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9282; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9283; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9284; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9285; GFX940-TGSPLIT-NEXT: s_endpgm 9286; 9287; GFX11-WGP-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9288; GFX11-WGP: ; %bb.0: ; %entry 9289; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9290; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9291; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9292; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9293; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9294; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9295; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9296; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9297; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9298; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9299; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9300; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9301; GFX11-WGP-NEXT: s_endpgm 9302; 9303; GFX11-CU-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9304; GFX11-CU: ; %bb.0: ; %entry 9305; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9306; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9307; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9308; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9309; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9310; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9311; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9312; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9313; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9314; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9315; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9316; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9317; GFX11-CU-NEXT: s_endpgm 9318; 9319; GFX12-WGP-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9320; GFX12-WGP: ; %bb.0: ; %entry 9321; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9322; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9323; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9324; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9325; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9326; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9327; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9328; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9329; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9330; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9331; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9332; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9333; GFX12-WGP-NEXT: s_endpgm 9334; 9335; GFX12-CU-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: 9336; GFX12-CU: ; %bb.0: ; %entry 9337; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9338; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9339; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9340; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9341; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9342; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9343; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9344; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9345; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9346; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9347; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9348; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9349; GFX12-CU-NEXT: s_endpgm 9350 ptr addrspace(1) %out, i32 %in, i32 %old) { 9351entry: 9352 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9353 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst 9354 %val0 = extractvalue { i32, i1 } %val, 0 9355 store i32 %val0, ptr addrspace(1) %out, align 4 9356 ret void 9357} 9358 9359define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( 9360; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9361; GFX6: ; %bb.0: ; %entry 9362; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 9363; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9364; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 9365; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 9366; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9367; GFX6-NEXT: s_mov_b32 s12, s5 9368; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9369; GFX6-NEXT: s_mov_b32 s10, 0x100f000 9370; GFX6-NEXT: s_mov_b32 s11, -1 9371; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9372; GFX6-NEXT: s_mov_b32 s5, s12 9373; GFX6-NEXT: s_mov_b32 s6, s11 9374; GFX6-NEXT: s_mov_b32 s7, s10 9375; GFX6-NEXT: v_mov_b32_e32 v0, s9 9376; GFX6-NEXT: v_mov_b32_e32 v2, s8 9377; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9378; GFX6-NEXT: v_mov_b32_e32 v1, v2 9379; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 9380; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9381; GFX6-NEXT: s_waitcnt vmcnt(0) 9382; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9383; GFX6-NEXT: s_endpgm 9384; 9385; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9386; GFX7: ; %bb.0: ; %entry 9387; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9388; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9389; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9390; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9391; GFX7-NEXT: s_mov_b64 s[12:13], 16 9392; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9393; GFX7-NEXT: s_mov_b32 s6, s4 9394; GFX7-NEXT: s_mov_b32 s7, s5 9395; GFX7-NEXT: s_mov_b32 s11, s12 9396; GFX7-NEXT: s_mov_b32 s10, s13 9397; GFX7-NEXT: s_add_u32 s6, s6, s11 9398; GFX7-NEXT: s_addc_u32 s10, s7, s10 9399; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9400; GFX7-NEXT: s_mov_b32 s7, s10 9401; GFX7-NEXT: v_mov_b32_e32 v2, s9 9402; GFX7-NEXT: v_mov_b32_e32 v0, s8 9403; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9404; GFX7-NEXT: v_mov_b32_e32 v3, v0 9405; GFX7-NEXT: v_mov_b32_e32 v0, s6 9406; GFX7-NEXT: v_mov_b32_e32 v1, s7 9407; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9408; GFX7-NEXT: v_mov_b32_e32 v0, s4 9409; GFX7-NEXT: v_mov_b32_e32 v1, s5 9410; GFX7-NEXT: s_waitcnt vmcnt(0) 9411; GFX7-NEXT: flat_store_dword v[0:1], v2 9412; GFX7-NEXT: s_endpgm 9413; 9414; GFX10-WGP-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9415; GFX10-WGP: ; %bb.0: ; %entry 9416; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9417; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9418; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 9419; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 9420; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9421; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9422; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 9423; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9424; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 9425; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9426; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9427; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9428; GFX10-WGP-NEXT: s_endpgm 9429; 9430; GFX10-CU-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9431; GFX10-CU: ; %bb.0: ; %entry 9432; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9433; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9434; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 9435; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 9436; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9437; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9438; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 9439; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9440; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 9441; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 9442; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9443; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9444; GFX10-CU-NEXT: s_endpgm 9445; 9446; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9447; SKIP-CACHE-INV: ; %bb.0: ; %entry 9448; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9449; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9450; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9451; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9452; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9453; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 9454; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9455; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 9456; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 9457; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9458; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 9459; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 9460; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9461; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 9462; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 9463; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 9464; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 9465; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 9466; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 9467; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9468; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9469; SKIP-CACHE-INV-NEXT: s_endpgm 9470; 9471; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9472; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9473; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9474; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9475; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9476; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9477; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9478; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9479; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9480; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9481; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9482; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9483; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9484; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9485; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9486; 9487; GFX90A-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9488; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9489; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9490; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9491; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9492; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9493; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9494; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9495; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 9496; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9497; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9498; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 9499; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9500; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9501; GFX90A-TGSPLIT-NEXT: s_endpgm 9502; 9503; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9504; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9505; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9506; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9507; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9508; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9509; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9510; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9511; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9512; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9513; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9514; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9515; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9516; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9517; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9518; 9519; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9520; GFX940-TGSPLIT: ; %bb.0: ; %entry 9521; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9522; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9523; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9524; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9525; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9526; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9527; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 9528; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9529; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 9530; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 9531; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9532; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9533; GFX940-TGSPLIT-NEXT: s_endpgm 9534; 9535; GFX11-WGP-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9536; GFX11-WGP: ; %bb.0: ; %entry 9537; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9538; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9539; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9540; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9541; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9542; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 9543; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 9544; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9545; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 9546; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9547; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9548; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9549; GFX11-WGP-NEXT: s_endpgm 9550; 9551; GFX11-CU-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9552; GFX11-CU: ; %bb.0: ; %entry 9553; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9554; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9555; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9556; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9557; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9558; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 9559; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 9560; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9561; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 9562; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 9563; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9564; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9565; GFX11-CU-NEXT: s_endpgm 9566; 9567; GFX12-WGP-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9568; GFX12-WGP: ; %bb.0: ; %entry 9569; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9570; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9571; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9572; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9573; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9574; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 9575; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 9576; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9577; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 9578; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9579; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9580; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9581; GFX12-WGP-NEXT: s_endpgm 9582; 9583; GFX12-CU-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: 9584; GFX12-CU: ; %bb.0: ; %entry 9585; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9586; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9587; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9588; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9589; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9590; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 9591; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 9592; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 9593; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 9594; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 9595; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9596; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9597; GFX12-CU-NEXT: s_endpgm 9598 ptr addrspace(1) %out, i32 %in, i32 %old) { 9599entry: 9600 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 9601 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst 9602 %val0 = extractvalue { i32, i1 } %val, 0 9603 store i32 %val0, ptr addrspace(1) %out, align 4 9604 ret void 9605} 9606 9607define amdgpu_kernel void @global_wavefront_one_as_unordered_load( 9608; GFX6-LABEL: global_wavefront_one_as_unordered_load: 9609; GFX6: ; %bb.0: ; %entry 9610; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 9611; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 9612; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 9613; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9614; GFX6-NEXT: s_mov_b32 s6, s9 9615; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 9616; GFX6-NEXT: s_mov_b32 s12, 0x100f000 9617; GFX6-NEXT: s_mov_b32 s13, -1 9618; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 9619; GFX6-NEXT: s_mov_b32 s9, s6 9620; GFX6-NEXT: s_mov_b32 s10, s13 9621; GFX6-NEXT: s_mov_b32 s11, s12 9622; GFX6-NEXT: s_mov_b32 s14, s5 9623; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9624; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9625; GFX6-NEXT: s_mov_b32 s5, s14 9626; GFX6-NEXT: s_mov_b32 s6, s13 9627; GFX6-NEXT: s_mov_b32 s7, s12 9628; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 9629; GFX6-NEXT: s_waitcnt vmcnt(0) 9630; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9631; GFX6-NEXT: s_endpgm 9632; 9633; GFX7-LABEL: global_wavefront_one_as_unordered_load: 9634; GFX7: ; %bb.0: ; %entry 9635; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9636; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 9637; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9638; GFX7-NEXT: v_mov_b32_e32 v0, s6 9639; GFX7-NEXT: v_mov_b32_e32 v1, s7 9640; GFX7-NEXT: flat_load_dword v2, v[0:1] 9641; GFX7-NEXT: v_mov_b32_e32 v0, s4 9642; GFX7-NEXT: v_mov_b32_e32 v1, s5 9643; GFX7-NEXT: s_waitcnt vmcnt(0) 9644; GFX7-NEXT: flat_store_dword v[0:1], v2 9645; GFX7-NEXT: s_endpgm 9646; 9647; GFX10-WGP-LABEL: global_wavefront_one_as_unordered_load: 9648; GFX10-WGP: ; %bb.0: ; %entry 9649; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9650; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9651; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9652; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9653; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 9654; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9655; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9656; GFX10-WGP-NEXT: s_endpgm 9657; 9658; GFX10-CU-LABEL: global_wavefront_one_as_unordered_load: 9659; GFX10-CU: ; %bb.0: ; %entry 9660; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9661; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9662; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9663; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9664; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 9665; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9666; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9667; GFX10-CU-NEXT: s_endpgm 9668; 9669; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_load: 9670; SKIP-CACHE-INV: ; %bb.0: ; %entry 9671; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 9672; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 9673; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 9674; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9675; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 9676; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9677; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 9678; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 9679; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9680; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 9681; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 9682; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 9683; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 9684; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9685; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9686; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 9687; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 9688; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 9689; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 9690; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9691; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9692; SKIP-CACHE-INV-NEXT: s_endpgm 9693; 9694; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_load: 9695; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9696; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9697; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9698; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9699; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9700; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 9701; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9702; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9703; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9704; 9705; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_unordered_load: 9706; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9707; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9708; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9709; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9710; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9711; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 9712; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9713; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9714; GFX90A-TGSPLIT-NEXT: s_endpgm 9715; 9716; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_load: 9717; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9718; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9719; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 9720; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 9721; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9722; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 9723; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9724; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9725; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9726; 9727; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_unordered_load: 9728; GFX940-TGSPLIT: ; %bb.0: ; %entry 9729; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9730; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 9731; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 9732; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9733; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 9734; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9735; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9736; GFX940-TGSPLIT-NEXT: s_endpgm 9737; 9738; GFX11-WGP-LABEL: global_wavefront_one_as_unordered_load: 9739; GFX11-WGP: ; %bb.0: ; %entry 9740; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9741; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9742; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9743; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9744; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 9745; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9746; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9747; GFX11-WGP-NEXT: s_endpgm 9748; 9749; GFX11-CU-LABEL: global_wavefront_one_as_unordered_load: 9750; GFX11-CU: ; %bb.0: ; %entry 9751; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9752; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9753; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9754; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9755; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 9756; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9757; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9758; GFX11-CU-NEXT: s_endpgm 9759; 9760; GFX12-WGP-LABEL: global_wavefront_one_as_unordered_load: 9761; GFX12-WGP: ; %bb.0: ; %entry 9762; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9763; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9764; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9765; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9766; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 9767; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9768; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9769; GFX12-WGP-NEXT: s_endpgm 9770; 9771; GFX12-CU-LABEL: global_wavefront_one_as_unordered_load: 9772; GFX12-CU: ; %bb.0: ; %entry 9773; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9774; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9775; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9776; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9777; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 9778; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9779; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9780; GFX12-CU-NEXT: s_endpgm 9781 ptr addrspace(1) %in, ptr addrspace(1) %out) { 9782entry: 9783 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") unordered, align 4 9784 store i32 %val, ptr addrspace(1) %out 9785 ret void 9786} 9787 9788define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( 9789; GFX6-LABEL: global_wavefront_one_as_monotonic_load: 9790; GFX6: ; %bb.0: ; %entry 9791; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 9792; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 9793; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 9794; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9795; GFX6-NEXT: s_mov_b32 s6, s9 9796; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 9797; GFX6-NEXT: s_mov_b32 s12, 0x100f000 9798; GFX6-NEXT: s_mov_b32 s13, -1 9799; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 9800; GFX6-NEXT: s_mov_b32 s9, s6 9801; GFX6-NEXT: s_mov_b32 s10, s13 9802; GFX6-NEXT: s_mov_b32 s11, s12 9803; GFX6-NEXT: s_mov_b32 s14, s5 9804; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9805; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9806; GFX6-NEXT: s_mov_b32 s5, s14 9807; GFX6-NEXT: s_mov_b32 s6, s13 9808; GFX6-NEXT: s_mov_b32 s7, s12 9809; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 9810; GFX6-NEXT: s_waitcnt vmcnt(0) 9811; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9812; GFX6-NEXT: s_endpgm 9813; 9814; GFX7-LABEL: global_wavefront_one_as_monotonic_load: 9815; GFX7: ; %bb.0: ; %entry 9816; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9817; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 9818; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9819; GFX7-NEXT: v_mov_b32_e32 v0, s6 9820; GFX7-NEXT: v_mov_b32_e32 v1, s7 9821; GFX7-NEXT: flat_load_dword v2, v[0:1] 9822; GFX7-NEXT: v_mov_b32_e32 v0, s4 9823; GFX7-NEXT: v_mov_b32_e32 v1, s5 9824; GFX7-NEXT: s_waitcnt vmcnt(0) 9825; GFX7-NEXT: flat_store_dword v[0:1], v2 9826; GFX7-NEXT: s_endpgm 9827; 9828; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_load: 9829; GFX10-WGP: ; %bb.0: ; %entry 9830; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 9831; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9832; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9833; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9834; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 9835; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 9836; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 9837; GFX10-WGP-NEXT: s_endpgm 9838; 9839; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_load: 9840; GFX10-CU: ; %bb.0: ; %entry 9841; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 9842; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9843; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9844; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9845; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 9846; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 9847; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 9848; GFX10-CU-NEXT: s_endpgm 9849; 9850; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_load: 9851; SKIP-CACHE-INV: ; %bb.0: ; %entry 9852; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 9853; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 9854; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 9855; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9856; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 9857; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9858; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 9859; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 9860; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9861; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 9862; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 9863; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 9864; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 9865; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 9866; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 9867; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 9868; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 9869; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 9870; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 9871; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 9872; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 9873; SKIP-CACHE-INV-NEXT: s_endpgm 9874; 9875; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: 9876; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9877; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9878; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9879; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9880; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9881; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 9882; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9883; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9884; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9885; 9886; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: 9887; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9888; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9889; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9890; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 9891; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9892; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 9893; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9894; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 9895; GFX90A-TGSPLIT-NEXT: s_endpgm 9896; 9897; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: 9898; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9899; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9900; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 9901; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 9902; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9903; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 9904; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 9905; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9906; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9907; 9908; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: 9909; GFX940-TGSPLIT: ; %bb.0: ; %entry 9910; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 9911; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 9912; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 9913; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9914; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 9915; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9916; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 9917; GFX940-TGSPLIT-NEXT: s_endpgm 9918; 9919; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_load: 9920; GFX11-WGP: ; %bb.0: ; %entry 9921; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 9922; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9923; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9924; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9925; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 9926; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 9927; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9928; GFX11-WGP-NEXT: s_endpgm 9929; 9930; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_load: 9931; GFX11-CU: ; %bb.0: ; %entry 9932; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 9933; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9934; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9935; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9936; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 9937; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 9938; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9939; GFX11-CU-NEXT: s_endpgm 9940; 9941; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_load: 9942; GFX12-WGP: ; %bb.0: ; %entry 9943; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 9944; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9945; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9946; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9947; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 9948; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 9949; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 9950; GFX12-WGP-NEXT: s_endpgm 9951; 9952; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_load: 9953; GFX12-CU: ; %bb.0: ; %entry 9954; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 9955; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 9956; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 9957; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9958; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 9959; GFX12-CU-NEXT: s_wait_loadcnt 0x0 9960; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 9961; GFX12-CU-NEXT: s_endpgm 9962 ptr addrspace(1) %in, ptr addrspace(1) %out) { 9963entry: 9964 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") monotonic, align 4 9965 store i32 %val, ptr addrspace(1) %out 9966 ret void 9967} 9968 9969define amdgpu_kernel void @global_wavefront_one_as_acquire_load( 9970; GFX6-LABEL: global_wavefront_one_as_acquire_load: 9971; GFX6: ; %bb.0: ; %entry 9972; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 9973; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 9974; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 9975; GFX6-NEXT: s_waitcnt lgkmcnt(0) 9976; GFX6-NEXT: s_mov_b32 s6, s9 9977; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 9978; GFX6-NEXT: s_mov_b32 s12, 0x100f000 9979; GFX6-NEXT: s_mov_b32 s13, -1 9980; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 9981; GFX6-NEXT: s_mov_b32 s9, s6 9982; GFX6-NEXT: s_mov_b32 s10, s13 9983; GFX6-NEXT: s_mov_b32 s11, s12 9984; GFX6-NEXT: s_mov_b32 s14, s5 9985; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 9986; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 9987; GFX6-NEXT: s_mov_b32 s5, s14 9988; GFX6-NEXT: s_mov_b32 s6, s13 9989; GFX6-NEXT: s_mov_b32 s7, s12 9990; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 9991; GFX6-NEXT: s_waitcnt vmcnt(0) 9992; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 9993; GFX6-NEXT: s_endpgm 9994; 9995; GFX7-LABEL: global_wavefront_one_as_acquire_load: 9996; GFX7: ; %bb.0: ; %entry 9997; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 9998; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 9999; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10000; GFX7-NEXT: v_mov_b32_e32 v0, s6 10001; GFX7-NEXT: v_mov_b32_e32 v1, s7 10002; GFX7-NEXT: flat_load_dword v2, v[0:1] 10003; GFX7-NEXT: v_mov_b32_e32 v0, s4 10004; GFX7-NEXT: v_mov_b32_e32 v1, s5 10005; GFX7-NEXT: s_waitcnt vmcnt(0) 10006; GFX7-NEXT: flat_store_dword v[0:1], v2 10007; GFX7-NEXT: s_endpgm 10008; 10009; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_load: 10010; GFX10-WGP: ; %bb.0: ; %entry 10011; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10012; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10013; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10014; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10015; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 10016; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10017; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10018; GFX10-WGP-NEXT: s_endpgm 10019; 10020; GFX10-CU-LABEL: global_wavefront_one_as_acquire_load: 10021; GFX10-CU: ; %bb.0: ; %entry 10022; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10023; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10024; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10025; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10026; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 10027; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10028; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10029; GFX10-CU-NEXT: s_endpgm 10030; 10031; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_load: 10032; SKIP-CACHE-INV: ; %bb.0: ; %entry 10033; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10034; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10035; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10036; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10037; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10038; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10039; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10040; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10041; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10042; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10043; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10044; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10045; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10046; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10047; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10048; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10049; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10050; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10051; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 10052; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10053; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10054; SKIP-CACHE-INV-NEXT: s_endpgm 10055; 10056; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_load: 10057; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10058; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10059; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10060; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10061; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10062; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10063; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10064; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10065; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10066; 10067; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_load: 10068; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10069; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10070; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10071; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10072; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10073; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10074; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10075; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10076; GFX90A-TGSPLIT-NEXT: s_endpgm 10077; 10078; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_load: 10079; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10080; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10081; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10082; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10083; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10084; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10085; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10086; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10087; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10088; 10089; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_load: 10090; GFX940-TGSPLIT: ; %bb.0: ; %entry 10091; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10092; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10093; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10094; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10095; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10096; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10097; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10098; GFX940-TGSPLIT-NEXT: s_endpgm 10099; 10100; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_load: 10101; GFX11-WGP: ; %bb.0: ; %entry 10102; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10103; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10104; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10105; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10106; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10107; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10108; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10109; GFX11-WGP-NEXT: s_endpgm 10110; 10111; GFX11-CU-LABEL: global_wavefront_one_as_acquire_load: 10112; GFX11-CU: ; %bb.0: ; %entry 10113; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10114; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10115; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10116; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10117; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10118; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10119; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10120; GFX11-CU-NEXT: s_endpgm 10121; 10122; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_load: 10123; GFX12-WGP: ; %bb.0: ; %entry 10124; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10125; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10126; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10127; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10128; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10129; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10130; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10131; GFX12-WGP-NEXT: s_endpgm 10132; 10133; GFX12-CU-LABEL: global_wavefront_one_as_acquire_load: 10134; GFX12-CU: ; %bb.0: ; %entry 10135; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10136; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10137; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10138; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10139; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10140; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10141; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10142; GFX12-CU-NEXT: s_endpgm 10143 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10144entry: 10145 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") acquire, align 4 10146 store i32 %val, ptr addrspace(1) %out 10147 ret void 10148} 10149 10150define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( 10151; GFX6-LABEL: global_wavefront_one_as_seq_cst_load: 10152; GFX6: ; %bb.0: ; %entry 10153; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10154; GFX6-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 10155; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10156; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10157; GFX6-NEXT: s_mov_b32 s6, s9 10158; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 10159; GFX6-NEXT: s_mov_b32 s12, 0x100f000 10160; GFX6-NEXT: s_mov_b32 s13, -1 10161; GFX6-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 10162; GFX6-NEXT: s_mov_b32 s9, s6 10163; GFX6-NEXT: s_mov_b32 s10, s13 10164; GFX6-NEXT: s_mov_b32 s11, s12 10165; GFX6-NEXT: s_mov_b32 s14, s5 10166; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10167; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10168; GFX6-NEXT: s_mov_b32 s5, s14 10169; GFX6-NEXT: s_mov_b32 s6, s13 10170; GFX6-NEXT: s_mov_b32 s7, s12 10171; GFX6-NEXT: buffer_load_dword v0, off, s[8:11], 0 10172; GFX6-NEXT: s_waitcnt vmcnt(0) 10173; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10174; GFX6-NEXT: s_endpgm 10175; 10176; GFX7-LABEL: global_wavefront_one_as_seq_cst_load: 10177; GFX7: ; %bb.0: ; %entry 10178; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10179; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 10180; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10181; GFX7-NEXT: v_mov_b32_e32 v0, s6 10182; GFX7-NEXT: v_mov_b32_e32 v1, s7 10183; GFX7-NEXT: flat_load_dword v2, v[0:1] 10184; GFX7-NEXT: v_mov_b32_e32 v0, s4 10185; GFX7-NEXT: v_mov_b32_e32 v1, s5 10186; GFX7-NEXT: s_waitcnt vmcnt(0) 10187; GFX7-NEXT: flat_store_dword v[0:1], v2 10188; GFX7-NEXT: s_endpgm 10189; 10190; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_load: 10191; GFX10-WGP: ; %bb.0: ; %entry 10192; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10193; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10194; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10195; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10196; GFX10-WGP-NEXT: global_load_dword v1, v0, s[6:7] 10197; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 10198; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10199; GFX10-WGP-NEXT: s_endpgm 10200; 10201; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_load: 10202; GFX10-CU: ; %bb.0: ; %entry 10203; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10204; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10205; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10206; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10207; GFX10-CU-NEXT: global_load_dword v1, v0, s[6:7] 10208; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 10209; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10210; GFX10-CU-NEXT: s_endpgm 10211; 10212; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_load: 10213; SKIP-CACHE-INV: ; %bb.0: ; %entry 10214; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10215; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 10216; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10217; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10218; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s5 10219; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10220; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, 0xf000 10221; SKIP-CACHE-INV-NEXT: s_mov_b32 s9, -1 10222; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10223; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s2 10224; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10225; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10226; SKIP-CACHE-INV-NEXT: s_mov_b32 s10, s1 10227; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10228; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10229; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s10 10230; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s9 10231; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s8 10232; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 10233; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 10234; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10235; SKIP-CACHE-INV-NEXT: s_endpgm 10236; 10237; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: 10238; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10239; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10240; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10241; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10242; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10243; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10244; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10245; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10246; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10247; 10248; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: 10249; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10250; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10251; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10252; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10253; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10254; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[6:7] 10255; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10256; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10257; GFX90A-TGSPLIT-NEXT: s_endpgm 10258; 10259; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: 10260; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10261; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10262; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10263; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10264; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10265; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10266; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 10267; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10268; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10269; 10270; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: 10271; GFX940-TGSPLIT: ; %bb.0: ; %entry 10272; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10273; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 10274; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10275; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10276; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] 10277; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10278; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10279; GFX940-TGSPLIT-NEXT: s_endpgm 10280; 10281; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_load: 10282; GFX11-WGP: ; %bb.0: ; %entry 10283; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10284; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10285; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10286; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10287; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10288; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 10289; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10290; GFX11-WGP-NEXT: s_endpgm 10291; 10292; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_load: 10293; GFX11-CU: ; %bb.0: ; %entry 10294; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10295; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10296; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10297; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10298; GFX11-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10299; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 10300; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10301; GFX11-CU-NEXT: s_endpgm 10302; 10303; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_load: 10304; GFX12-WGP: ; %bb.0: ; %entry 10305; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10306; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10307; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10308; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10309; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] 10310; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 10311; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10312; GFX12-WGP-NEXT: s_endpgm 10313; 10314; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_load: 10315; GFX12-CU: ; %bb.0: ; %entry 10316; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10317; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 10318; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10319; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10320; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] 10321; GFX12-CU-NEXT: s_wait_loadcnt 0x0 10322; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10323; GFX12-CU-NEXT: s_endpgm 10324 ptr addrspace(1) %in, ptr addrspace(1) %out) { 10325entry: 10326 %val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") seq_cst, align 4 10327 store i32 %val, ptr addrspace(1) %out 10328 ret void 10329} 10330 10331define amdgpu_kernel void @global_wavefront_one_as_unordered_store( 10332; GFX6-LABEL: global_wavefront_one_as_unordered_store: 10333; GFX6: ; %bb.0: ; %entry 10334; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10335; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10336; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10337; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10338; GFX6-NEXT: s_mov_b32 s11, s5 10339; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10340; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10341; GFX6-NEXT: s_mov_b32 s10, -1 10342; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10343; GFX6-NEXT: s_mov_b32 s5, s11 10344; GFX6-NEXT: s_mov_b32 s6, s10 10345; GFX6-NEXT: s_mov_b32 s7, s9 10346; GFX6-NEXT: v_mov_b32_e32 v0, s8 10347; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10348; GFX6-NEXT: s_endpgm 10349; 10350; GFX7-LABEL: global_wavefront_one_as_unordered_store: 10351; GFX7: ; %bb.0: ; %entry 10352; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10353; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10354; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10355; GFX7-NEXT: v_mov_b32_e32 v0, s6 10356; GFX7-NEXT: v_mov_b32_e32 v1, s7 10357; GFX7-NEXT: v_mov_b32_e32 v2, s4 10358; GFX7-NEXT: flat_store_dword v[0:1], v2 10359; GFX7-NEXT: s_endpgm 10360; 10361; GFX10-WGP-LABEL: global_wavefront_one_as_unordered_store: 10362; GFX10-WGP: ; %bb.0: ; %entry 10363; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10364; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10365; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10366; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10367; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10368; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10369; GFX10-WGP-NEXT: s_endpgm 10370; 10371; GFX10-CU-LABEL: global_wavefront_one_as_unordered_store: 10372; GFX10-CU: ; %bb.0: ; %entry 10373; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10374; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10375; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10376; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10377; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10378; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10379; GFX10-CU-NEXT: s_endpgm 10380; 10381; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_store: 10382; SKIP-CACHE-INV: ; %bb.0: ; %entry 10383; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10384; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10385; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10386; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10387; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10388; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10389; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10390; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10391; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10392; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10393; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10394; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10395; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10396; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10397; SKIP-CACHE-INV-NEXT: s_endpgm 10398; 10399; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_store: 10400; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10401; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10402; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10403; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10404; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10405; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10406; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10407; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10408; 10409; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_unordered_store: 10410; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10411; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10412; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10413; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10414; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10415; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10416; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10417; GFX90A-TGSPLIT-NEXT: s_endpgm 10418; 10419; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_store: 10420; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10421; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10422; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10423; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10424; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10425; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10426; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10427; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10428; 10429; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_unordered_store: 10430; GFX940-TGSPLIT: ; %bb.0: ; %entry 10431; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10432; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10433; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10434; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10435; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10436; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10437; GFX940-TGSPLIT-NEXT: s_endpgm 10438; 10439; GFX11-WGP-LABEL: global_wavefront_one_as_unordered_store: 10440; GFX11-WGP: ; %bb.0: ; %entry 10441; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10442; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10443; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10444; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10445; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 10446; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10447; GFX11-WGP-NEXT: s_endpgm 10448; 10449; GFX11-CU-LABEL: global_wavefront_one_as_unordered_store: 10450; GFX11-CU: ; %bb.0: ; %entry 10451; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10452; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10453; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10454; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10455; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 10456; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10457; GFX11-CU-NEXT: s_endpgm 10458; 10459; GFX12-WGP-LABEL: global_wavefront_one_as_unordered_store: 10460; GFX12-WGP: ; %bb.0: ; %entry 10461; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10462; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10463; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10464; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10465; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 10466; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10467; GFX12-WGP-NEXT: s_endpgm 10468; 10469; GFX12-CU-LABEL: global_wavefront_one_as_unordered_store: 10470; GFX12-CU: ; %bb.0: ; %entry 10471; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10472; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10473; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10474; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10475; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 10476; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10477; GFX12-CU-NEXT: s_endpgm 10478 i32 %in, ptr addrspace(1) %out) { 10479entry: 10480 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") unordered, align 4 10481 ret void 10482} 10483 10484define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( 10485; GFX6-LABEL: global_wavefront_one_as_monotonic_store: 10486; GFX6: ; %bb.0: ; %entry 10487; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10488; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10489; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10490; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10491; GFX6-NEXT: s_mov_b32 s11, s5 10492; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10493; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10494; GFX6-NEXT: s_mov_b32 s10, -1 10495; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10496; GFX6-NEXT: s_mov_b32 s5, s11 10497; GFX6-NEXT: s_mov_b32 s6, s10 10498; GFX6-NEXT: s_mov_b32 s7, s9 10499; GFX6-NEXT: v_mov_b32_e32 v0, s8 10500; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10501; GFX6-NEXT: s_endpgm 10502; 10503; GFX7-LABEL: global_wavefront_one_as_monotonic_store: 10504; GFX7: ; %bb.0: ; %entry 10505; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10506; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10507; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10508; GFX7-NEXT: v_mov_b32_e32 v0, s6 10509; GFX7-NEXT: v_mov_b32_e32 v1, s7 10510; GFX7-NEXT: v_mov_b32_e32 v2, s4 10511; GFX7-NEXT: flat_store_dword v[0:1], v2 10512; GFX7-NEXT: s_endpgm 10513; 10514; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_store: 10515; GFX10-WGP: ; %bb.0: ; %entry 10516; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10517; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10518; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10519; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10520; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10521; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10522; GFX10-WGP-NEXT: s_endpgm 10523; 10524; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_store: 10525; GFX10-CU: ; %bb.0: ; %entry 10526; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10527; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10528; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10529; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10530; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10531; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10532; GFX10-CU-NEXT: s_endpgm 10533; 10534; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_store: 10535; SKIP-CACHE-INV: ; %bb.0: ; %entry 10536; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10537; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10538; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10539; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10540; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10541; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10542; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10543; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10544; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10545; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10546; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10547; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10548; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10549; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10550; SKIP-CACHE-INV-NEXT: s_endpgm 10551; 10552; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: 10553; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10554; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10555; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10556; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10557; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10558; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10559; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10560; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10561; 10562; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: 10563; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10564; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10565; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10566; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10567; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10568; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10569; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10570; GFX90A-TGSPLIT-NEXT: s_endpgm 10571; 10572; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: 10573; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10574; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10575; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10576; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10577; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10578; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10579; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10580; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10581; 10582; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: 10583; GFX940-TGSPLIT: ; %bb.0: ; %entry 10584; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10585; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10586; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10587; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10588; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10589; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10590; GFX940-TGSPLIT-NEXT: s_endpgm 10591; 10592; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_store: 10593; GFX11-WGP: ; %bb.0: ; %entry 10594; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10595; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10596; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10597; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10598; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 10599; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10600; GFX11-WGP-NEXT: s_endpgm 10601; 10602; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_store: 10603; GFX11-CU: ; %bb.0: ; %entry 10604; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10605; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10606; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10607; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10608; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 10609; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10610; GFX11-CU-NEXT: s_endpgm 10611; 10612; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_store: 10613; GFX12-WGP: ; %bb.0: ; %entry 10614; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10615; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10616; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10617; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10618; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 10619; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10620; GFX12-WGP-NEXT: s_endpgm 10621; 10622; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_store: 10623; GFX12-CU: ; %bb.0: ; %entry 10624; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10625; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10626; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10627; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10628; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 10629; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10630; GFX12-CU-NEXT: s_endpgm 10631 i32 %in, ptr addrspace(1) %out) { 10632entry: 10633 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") monotonic, align 4 10634 ret void 10635} 10636 10637define amdgpu_kernel void @global_wavefront_one_as_release_store( 10638; GFX6-LABEL: global_wavefront_one_as_release_store: 10639; GFX6: ; %bb.0: ; %entry 10640; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10641; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10642; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10643; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10644; GFX6-NEXT: s_mov_b32 s11, s5 10645; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10646; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10647; GFX6-NEXT: s_mov_b32 s10, -1 10648; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10649; GFX6-NEXT: s_mov_b32 s5, s11 10650; GFX6-NEXT: s_mov_b32 s6, s10 10651; GFX6-NEXT: s_mov_b32 s7, s9 10652; GFX6-NEXT: v_mov_b32_e32 v0, s8 10653; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10654; GFX6-NEXT: s_endpgm 10655; 10656; GFX7-LABEL: global_wavefront_one_as_release_store: 10657; GFX7: ; %bb.0: ; %entry 10658; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10659; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10660; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10661; GFX7-NEXT: v_mov_b32_e32 v0, s6 10662; GFX7-NEXT: v_mov_b32_e32 v1, s7 10663; GFX7-NEXT: v_mov_b32_e32 v2, s4 10664; GFX7-NEXT: flat_store_dword v[0:1], v2 10665; GFX7-NEXT: s_endpgm 10666; 10667; GFX10-WGP-LABEL: global_wavefront_one_as_release_store: 10668; GFX10-WGP: ; %bb.0: ; %entry 10669; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10670; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10671; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10672; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10673; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10674; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10675; GFX10-WGP-NEXT: s_endpgm 10676; 10677; GFX10-CU-LABEL: global_wavefront_one_as_release_store: 10678; GFX10-CU: ; %bb.0: ; %entry 10679; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10680; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10681; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10682; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10683; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10684; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10685; GFX10-CU-NEXT: s_endpgm 10686; 10687; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_store: 10688; SKIP-CACHE-INV: ; %bb.0: ; %entry 10689; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10690; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10691; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10692; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10693; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10694; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10695; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10696; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10697; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10698; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10699; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10700; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10701; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10702; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10703; SKIP-CACHE-INV-NEXT: s_endpgm 10704; 10705; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_store: 10706; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10707; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10708; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10709; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10710; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10711; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10712; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10713; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10714; 10715; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_store: 10716; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10717; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10718; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10719; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10720; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10721; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10722; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10723; GFX90A-TGSPLIT-NEXT: s_endpgm 10724; 10725; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_store: 10726; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10727; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10728; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10729; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10730; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10731; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10732; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10733; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10734; 10735; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_store: 10736; GFX940-TGSPLIT: ; %bb.0: ; %entry 10737; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10738; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10739; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10740; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10741; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10742; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10743; GFX940-TGSPLIT-NEXT: s_endpgm 10744; 10745; GFX11-WGP-LABEL: global_wavefront_one_as_release_store: 10746; GFX11-WGP: ; %bb.0: ; %entry 10747; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10748; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10749; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10750; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10751; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 10752; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10753; GFX11-WGP-NEXT: s_endpgm 10754; 10755; GFX11-CU-LABEL: global_wavefront_one_as_release_store: 10756; GFX11-CU: ; %bb.0: ; %entry 10757; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10758; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10759; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10760; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10761; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 10762; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10763; GFX11-CU-NEXT: s_endpgm 10764; 10765; GFX12-WGP-LABEL: global_wavefront_one_as_release_store: 10766; GFX12-WGP: ; %bb.0: ; %entry 10767; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10768; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10769; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10770; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10771; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 10772; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10773; GFX12-WGP-NEXT: s_endpgm 10774; 10775; GFX12-CU-LABEL: global_wavefront_one_as_release_store: 10776; GFX12-CU: ; %bb.0: ; %entry 10777; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10778; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10779; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10780; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10781; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 10782; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10783; GFX12-CU-NEXT: s_endpgm 10784 i32 %in, ptr addrspace(1) %out) { 10785entry: 10786 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") release, align 4 10787 ret void 10788} 10789 10790define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( 10791; GFX6-LABEL: global_wavefront_one_as_seq_cst_store: 10792; GFX6: ; %bb.0: ; %entry 10793; GFX6-NEXT: s_mov_b64 s[4:5], s[8:9] 10794; GFX6-NEXT: s_load_dword s8, s[4:5], 0x0 10795; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 10796; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10797; GFX6-NEXT: s_mov_b32 s11, s5 10798; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10799; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10800; GFX6-NEXT: s_mov_b32 s10, -1 10801; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10802; GFX6-NEXT: s_mov_b32 s5, s11 10803; GFX6-NEXT: s_mov_b32 s6, s10 10804; GFX6-NEXT: s_mov_b32 s7, s9 10805; GFX6-NEXT: v_mov_b32_e32 v0, s8 10806; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 10807; GFX6-NEXT: s_endpgm 10808; 10809; GFX7-LABEL: global_wavefront_one_as_seq_cst_store: 10810; GFX7: ; %bb.0: ; %entry 10811; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 10812; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 10813; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10814; GFX7-NEXT: v_mov_b32_e32 v0, s6 10815; GFX7-NEXT: v_mov_b32_e32 v1, s7 10816; GFX7-NEXT: v_mov_b32_e32 v2, s4 10817; GFX7-NEXT: flat_store_dword v[0:1], v2 10818; GFX7-NEXT: s_endpgm 10819; 10820; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_store: 10821; GFX10-WGP: ; %bb.0: ; %entry 10822; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 10823; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10824; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10825; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10826; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10827; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 10828; GFX10-WGP-NEXT: s_endpgm 10829; 10830; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_store: 10831; GFX10-CU: ; %bb.0: ; %entry 10832; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 10833; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10834; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10835; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10836; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10837; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 10838; GFX10-CU-NEXT: s_endpgm 10839; 10840; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_store: 10841; SKIP-CACHE-INV: ; %bb.0: ; %entry 10842; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 10843; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 10844; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 10845; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10846; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10847; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10848; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 10849; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 10850; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 10851; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 10852; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 10853; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 10854; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10855; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 10856; SKIP-CACHE-INV-NEXT: s_endpgm 10857; 10858; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: 10859; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10860; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10861; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10862; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10863; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10864; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10865; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10866; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10867; 10868; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: 10869; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10870; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x0 10871; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 10872; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10873; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10874; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 10875; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 10876; GFX90A-TGSPLIT-NEXT: s_endpgm 10877; 10878; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: 10879; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10880; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10881; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10882; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10883; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10884; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10885; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10886; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10887; 10888; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: 10889; GFX940-TGSPLIT: ; %bb.0: ; %entry 10890; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 10891; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 10892; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 10893; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10894; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 10895; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 10896; GFX940-TGSPLIT-NEXT: s_endpgm 10897; 10898; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_store: 10899; GFX11-WGP: ; %bb.0: ; %entry 10900; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10901; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10902; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 10903; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10904; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 10905; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10906; GFX11-WGP-NEXT: s_endpgm 10907; 10908; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_store: 10909; GFX11-CU: ; %bb.0: ; %entry 10910; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10911; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10912; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 10913; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10914; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 10915; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10916; GFX11-CU-NEXT: s_endpgm 10917; 10918; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_store: 10919; GFX12-WGP: ; %bb.0: ; %entry 10920; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 10921; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10922; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 10923; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10924; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 10925; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 10926; GFX12-WGP-NEXT: s_endpgm 10927; 10928; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_store: 10929; GFX12-CU: ; %bb.0: ; %entry 10930; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 10931; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 10932; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 10933; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10934; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 10935; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 10936; GFX12-CU-NEXT: s_endpgm 10937 i32 %in, ptr addrspace(1) %out) { 10938entry: 10939 store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") seq_cst, align 4 10940 ret void 10941} 10942 10943define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( 10944; GFX6-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 10945; GFX6: ; %bb.0: ; %entry 10946; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10947; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 10948; GFX6-NEXT: s_waitcnt lgkmcnt(0) 10949; GFX6-NEXT: s_mov_b32 s11, s5 10950; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 10951; GFX6-NEXT: s_mov_b32 s9, 0x100f000 10952; GFX6-NEXT: s_mov_b32 s10, -1 10953; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 10954; GFX6-NEXT: s_mov_b32 s5, s11 10955; GFX6-NEXT: s_mov_b32 s6, s10 10956; GFX6-NEXT: s_mov_b32 s7, s9 10957; GFX6-NEXT: v_mov_b32_e32 v0, s8 10958; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 10959; GFX6-NEXT: s_endpgm 10960; 10961; GFX7-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 10962; GFX7: ; %bb.0: ; %entry 10963; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 10964; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 10965; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10966; GFX7-NEXT: v_mov_b32_e32 v0, s6 10967; GFX7-NEXT: v_mov_b32_e32 v1, s7 10968; GFX7-NEXT: v_mov_b32_e32 v2, s4 10969; GFX7-NEXT: flat_atomic_swap v[0:1], v2 10970; GFX7-NEXT: s_endpgm 10971; 10972; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 10973; GFX10-WGP: ; %bb.0: ; %entry 10974; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 10975; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10976; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 10977; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10978; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 10979; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 10980; GFX10-WGP-NEXT: s_endpgm 10981; 10982; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 10983; GFX10-CU: ; %bb.0: ; %entry 10984; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 10985; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10986; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 10987; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10988; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 10989; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 10990; GFX10-CU-NEXT: s_endpgm 10991; 10992; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 10993; SKIP-CACHE-INV: ; %bb.0: ; %entry 10994; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10995; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 10996; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10997; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 10998; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 10999; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11000; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11001; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11002; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11003; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11004; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11005; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11006; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11007; SKIP-CACHE-INV-NEXT: s_endpgm 11008; 11009; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11010; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11011; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11012; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11013; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11014; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11015; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11016; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11017; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11018; 11019; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11020; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11021; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11022; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11023; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11024; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11025; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11026; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11027; GFX90A-TGSPLIT-NEXT: s_endpgm 11028; 11029; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11030; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11031; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11032; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11033; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11034; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11035; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11036; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11037; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11038; 11039; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11040; GFX940-TGSPLIT: ; %bb.0: ; %entry 11041; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11042; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11043; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11044; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11045; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11046; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11047; GFX940-TGSPLIT-NEXT: s_endpgm 11048; 11049; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11050; GFX11-WGP: ; %bb.0: ; %entry 11051; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11052; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11053; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11054; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11055; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11056; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11057; GFX11-WGP-NEXT: s_endpgm 11058; 11059; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11060; GFX11-CU: ; %bb.0: ; %entry 11061; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11062; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11063; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11064; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11065; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11066; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11067; GFX11-CU-NEXT: s_endpgm 11068; 11069; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11070; GFX12-WGP: ; %bb.0: ; %entry 11071; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11072; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11073; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11074; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11075; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11076; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11077; GFX12-WGP-NEXT: s_endpgm 11078; 11079; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_atomicrmw: 11080; GFX12-CU: ; %bb.0: ; %entry 11081; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11082; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11083; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11084; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11085; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11086; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11087; GFX12-CU-NEXT: s_endpgm 11088 ptr addrspace(1) %out, i32 %in) { 11089entry: 11090 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") monotonic 11091 ret void 11092} 11093 11094define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( 11095; GFX6-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11096; GFX6: ; %bb.0: ; %entry 11097; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11098; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11099; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11100; GFX6-NEXT: s_mov_b32 s11, s5 11101; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11102; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11103; GFX6-NEXT: s_mov_b32 s10, -1 11104; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11105; GFX6-NEXT: s_mov_b32 s5, s11 11106; GFX6-NEXT: s_mov_b32 s6, s10 11107; GFX6-NEXT: s_mov_b32 s7, s9 11108; GFX6-NEXT: v_mov_b32_e32 v0, s8 11109; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11110; GFX6-NEXT: s_endpgm 11111; 11112; GFX7-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11113; GFX7: ; %bb.0: ; %entry 11114; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11115; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11116; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11117; GFX7-NEXT: v_mov_b32_e32 v0, s6 11118; GFX7-NEXT: v_mov_b32_e32 v1, s7 11119; GFX7-NEXT: v_mov_b32_e32 v2, s4 11120; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11121; GFX7-NEXT: s_endpgm 11122; 11123; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11124; GFX10-WGP: ; %bb.0: ; %entry 11125; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11126; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11127; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11128; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11129; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11130; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11131; GFX10-WGP-NEXT: s_endpgm 11132; 11133; GFX10-CU-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11134; GFX10-CU: ; %bb.0: ; %entry 11135; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11136; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11137; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11138; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11139; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11140; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11141; GFX10-CU-NEXT: s_endpgm 11142; 11143; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11144; SKIP-CACHE-INV: ; %bb.0: ; %entry 11145; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11146; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11147; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11148; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11149; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11150; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11151; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11152; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11153; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11154; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11155; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11156; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11157; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11158; SKIP-CACHE-INV-NEXT: s_endpgm 11159; 11160; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11161; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11162; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11163; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11164; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11165; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11166; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11167; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11168; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11169; 11170; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11171; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11172; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11173; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11174; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11175; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11176; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11177; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11178; GFX90A-TGSPLIT-NEXT: s_endpgm 11179; 11180; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11181; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11182; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11183; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11184; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11185; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11186; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11187; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11188; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11189; 11190; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11191; GFX940-TGSPLIT: ; %bb.0: ; %entry 11192; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11193; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11194; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11195; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11196; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11197; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11198; GFX940-TGSPLIT-NEXT: s_endpgm 11199; 11200; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11201; GFX11-WGP: ; %bb.0: ; %entry 11202; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11203; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11204; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11205; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11206; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11207; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11208; GFX11-WGP-NEXT: s_endpgm 11209; 11210; GFX11-CU-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11211; GFX11-CU: ; %bb.0: ; %entry 11212; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11213; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11214; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11215; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11216; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11217; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11218; GFX11-CU-NEXT: s_endpgm 11219; 11220; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11221; GFX12-WGP: ; %bb.0: ; %entry 11222; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11223; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11224; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11225; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11226; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11227; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11228; GFX12-WGP-NEXT: s_endpgm 11229; 11230; GFX12-CU-LABEL: global_wavefront_one_as_acquire_atomicrmw: 11231; GFX12-CU: ; %bb.0: ; %entry 11232; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11233; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11234; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11235; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11236; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11237; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11238; GFX12-CU-NEXT: s_endpgm 11239 ptr addrspace(1) %out, i32 %in) { 11240entry: 11241 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire 11242 ret void 11243} 11244 11245define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( 11246; GFX6-LABEL: global_wavefront_one_as_release_atomicrmw: 11247; GFX6: ; %bb.0: ; %entry 11248; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11249; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11250; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11251; GFX6-NEXT: s_mov_b32 s11, s5 11252; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11253; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11254; GFX6-NEXT: s_mov_b32 s10, -1 11255; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11256; GFX6-NEXT: s_mov_b32 s5, s11 11257; GFX6-NEXT: s_mov_b32 s6, s10 11258; GFX6-NEXT: s_mov_b32 s7, s9 11259; GFX6-NEXT: v_mov_b32_e32 v0, s8 11260; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11261; GFX6-NEXT: s_endpgm 11262; 11263; GFX7-LABEL: global_wavefront_one_as_release_atomicrmw: 11264; GFX7: ; %bb.0: ; %entry 11265; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11266; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11267; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11268; GFX7-NEXT: v_mov_b32_e32 v0, s6 11269; GFX7-NEXT: v_mov_b32_e32 v1, s7 11270; GFX7-NEXT: v_mov_b32_e32 v2, s4 11271; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11272; GFX7-NEXT: s_endpgm 11273; 11274; GFX10-WGP-LABEL: global_wavefront_one_as_release_atomicrmw: 11275; GFX10-WGP: ; %bb.0: ; %entry 11276; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11277; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11278; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11279; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11280; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11281; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11282; GFX10-WGP-NEXT: s_endpgm 11283; 11284; GFX10-CU-LABEL: global_wavefront_one_as_release_atomicrmw: 11285; GFX10-CU: ; %bb.0: ; %entry 11286; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11287; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11288; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11289; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11290; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11291; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11292; GFX10-CU-NEXT: s_endpgm 11293; 11294; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_atomicrmw: 11295; SKIP-CACHE-INV: ; %bb.0: ; %entry 11296; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11297; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11298; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11299; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11300; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11301; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11302; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11303; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11304; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11305; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11306; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11307; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11308; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11309; SKIP-CACHE-INV-NEXT: s_endpgm 11310; 11311; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: 11312; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11313; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11314; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11315; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11316; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11317; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11318; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11319; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11320; 11321; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: 11322; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11323; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11324; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11325; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11326; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11327; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11328; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11329; GFX90A-TGSPLIT-NEXT: s_endpgm 11330; 11331; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: 11332; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11333; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11334; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11335; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11336; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11337; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11338; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11339; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11340; 11341; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: 11342; GFX940-TGSPLIT: ; %bb.0: ; %entry 11343; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11344; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11345; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11346; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11347; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11348; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11349; GFX940-TGSPLIT-NEXT: s_endpgm 11350; 11351; GFX11-WGP-LABEL: global_wavefront_one_as_release_atomicrmw: 11352; GFX11-WGP: ; %bb.0: ; %entry 11353; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11354; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11355; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11356; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11357; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11358; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11359; GFX11-WGP-NEXT: s_endpgm 11360; 11361; GFX11-CU-LABEL: global_wavefront_one_as_release_atomicrmw: 11362; GFX11-CU: ; %bb.0: ; %entry 11363; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11364; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11365; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11366; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11367; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11368; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11369; GFX11-CU-NEXT: s_endpgm 11370; 11371; GFX12-WGP-LABEL: global_wavefront_one_as_release_atomicrmw: 11372; GFX12-WGP: ; %bb.0: ; %entry 11373; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11374; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11375; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11376; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11377; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11378; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11379; GFX12-WGP-NEXT: s_endpgm 11380; 11381; GFX12-CU-LABEL: global_wavefront_one_as_release_atomicrmw: 11382; GFX12-CU: ; %bb.0: ; %entry 11383; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11384; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11385; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11386; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11387; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11388; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11389; GFX12-CU-NEXT: s_endpgm 11390 ptr addrspace(1) %out, i32 %in) { 11391entry: 11392 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") release 11393 ret void 11394} 11395 11396define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( 11397; GFX6-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11398; GFX6: ; %bb.0: ; %entry 11399; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11400; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11401; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11402; GFX6-NEXT: s_mov_b32 s11, s5 11403; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11404; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11405; GFX6-NEXT: s_mov_b32 s10, -1 11406; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11407; GFX6-NEXT: s_mov_b32 s5, s11 11408; GFX6-NEXT: s_mov_b32 s6, s10 11409; GFX6-NEXT: s_mov_b32 s7, s9 11410; GFX6-NEXT: v_mov_b32_e32 v0, s8 11411; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11412; GFX6-NEXT: s_endpgm 11413; 11414; GFX7-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11415; GFX7: ; %bb.0: ; %entry 11416; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11417; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11418; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11419; GFX7-NEXT: v_mov_b32_e32 v0, s6 11420; GFX7-NEXT: v_mov_b32_e32 v1, s7 11421; GFX7-NEXT: v_mov_b32_e32 v2, s4 11422; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11423; GFX7-NEXT: s_endpgm 11424; 11425; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11426; GFX10-WGP: ; %bb.0: ; %entry 11427; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11428; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11429; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11430; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11431; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11432; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11433; GFX10-WGP-NEXT: s_endpgm 11434; 11435; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11436; GFX10-CU: ; %bb.0: ; %entry 11437; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11438; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11439; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11440; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11441; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11442; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11443; GFX10-CU-NEXT: s_endpgm 11444; 11445; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11446; SKIP-CACHE-INV: ; %bb.0: ; %entry 11447; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11448; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11449; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11450; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11451; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11452; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11453; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11454; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11455; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11456; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11457; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11458; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11459; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11460; SKIP-CACHE-INV-NEXT: s_endpgm 11461; 11462; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11463; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11464; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11465; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11466; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11467; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11468; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11469; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11470; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11471; 11472; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11473; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11474; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11475; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11476; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11477; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11478; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11479; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11480; GFX90A-TGSPLIT-NEXT: s_endpgm 11481; 11482; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11483; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11484; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11485; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11486; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11487; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11488; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11489; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11490; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11491; 11492; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11493; GFX940-TGSPLIT: ; %bb.0: ; %entry 11494; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11495; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11496; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11497; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11498; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11499; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11500; GFX940-TGSPLIT-NEXT: s_endpgm 11501; 11502; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11503; GFX11-WGP: ; %bb.0: ; %entry 11504; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11505; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11506; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11507; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11508; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11509; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11510; GFX11-WGP-NEXT: s_endpgm 11511; 11512; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11513; GFX11-CU: ; %bb.0: ; %entry 11514; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11515; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11516; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11517; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11518; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11519; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11520; GFX11-CU-NEXT: s_endpgm 11521; 11522; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11523; GFX12-WGP: ; %bb.0: ; %entry 11524; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11525; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11526; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11527; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11528; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11529; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11530; GFX12-WGP-NEXT: s_endpgm 11531; 11532; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: 11533; GFX12-CU: ; %bb.0: ; %entry 11534; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11535; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11536; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11537; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11538; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11539; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11540; GFX12-CU-NEXT: s_endpgm 11541 ptr addrspace(1) %out, i32 %in) { 11542entry: 11543 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel 11544 ret void 11545} 11546 11547define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( 11548; GFX6-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11549; GFX6: ; %bb.0: ; %entry 11550; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11551; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11552; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11553; GFX6-NEXT: s_mov_b32 s11, s5 11554; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11555; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11556; GFX6-NEXT: s_mov_b32 s10, -1 11557; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11558; GFX6-NEXT: s_mov_b32 s5, s11 11559; GFX6-NEXT: s_mov_b32 s6, s10 11560; GFX6-NEXT: s_mov_b32 s7, s9 11561; GFX6-NEXT: v_mov_b32_e32 v0, s8 11562; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 11563; GFX6-NEXT: s_endpgm 11564; 11565; GFX7-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11566; GFX7: ; %bb.0: ; %entry 11567; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11568; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 11569; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11570; GFX7-NEXT: v_mov_b32_e32 v0, s6 11571; GFX7-NEXT: v_mov_b32_e32 v1, s7 11572; GFX7-NEXT: v_mov_b32_e32 v2, s4 11573; GFX7-NEXT: flat_atomic_swap v[0:1], v2 11574; GFX7-NEXT: s_endpgm 11575; 11576; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11577; GFX10-WGP: ; %bb.0: ; %entry 11578; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11579; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11580; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11581; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11582; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11583; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[4:5] 11584; GFX10-WGP-NEXT: s_endpgm 11585; 11586; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11587; GFX10-CU: ; %bb.0: ; %entry 11588; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11589; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11590; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11591; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11592; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11593; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[4:5] 11594; GFX10-CU-NEXT: s_endpgm 11595; 11596; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11597; SKIP-CACHE-INV: ; %bb.0: ; %entry 11598; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11599; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11600; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11601; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11602; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11603; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11604; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11605; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11606; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11607; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11608; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11609; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11610; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 11611; SKIP-CACHE-INV-NEXT: s_endpgm 11612; 11613; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11614; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11615; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11616; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11617; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11618; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11619; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11620; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11621; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11622; 11623; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11624; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11625; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11626; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11627; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11628; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11629; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11630; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] 11631; GFX90A-TGSPLIT-NEXT: s_endpgm 11632; 11633; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11634; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11635; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11636; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11637; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11638; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11639; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11640; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11641; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11642; 11643; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11644; GFX940-TGSPLIT: ; %bb.0: ; %entry 11645; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11646; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11647; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11648; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11649; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11650; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] 11651; GFX940-TGSPLIT-NEXT: s_endpgm 11652; 11653; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11654; GFX11-WGP: ; %bb.0: ; %entry 11655; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11656; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11657; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11658; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11659; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11660; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11661; GFX11-WGP-NEXT: s_endpgm 11662; 11663; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11664; GFX11-CU: ; %bb.0: ; %entry 11665; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11666; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11667; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11668; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11669; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11670; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11671; GFX11-CU-NEXT: s_endpgm 11672; 11673; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11674; GFX12-WGP: ; %bb.0: ; %entry 11675; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11676; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11677; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11678; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11679; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11680; GFX12-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11681; GFX12-WGP-NEXT: s_endpgm 11682; 11683; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: 11684; GFX12-CU: ; %bb.0: ; %entry 11685; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11686; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11687; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11688; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11689; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11690; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] 11691; GFX12-CU-NEXT: s_endpgm 11692 ptr addrspace(1) %out, i32 %in) { 11693entry: 11694 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst 11695 ret void 11696} 11697 11698define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( 11699; GFX6-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11700; GFX6: ; %bb.0: ; %entry 11701; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11702; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11703; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11704; GFX6-NEXT: s_mov_b32 s11, s5 11705; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11706; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11707; GFX6-NEXT: s_mov_b32 s10, -1 11708; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11709; GFX6-NEXT: s_mov_b32 s5, s11 11710; GFX6-NEXT: s_mov_b32 s6, s10 11711; GFX6-NEXT: s_mov_b32 s7, s9 11712; GFX6-NEXT: v_mov_b32_e32 v0, s8 11713; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 11714; GFX6-NEXT: s_waitcnt vmcnt(0) 11715; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11716; GFX6-NEXT: s_endpgm 11717; 11718; GFX7-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11719; GFX7: ; %bb.0: ; %entry 11720; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11721; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 11722; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11723; GFX7-NEXT: v_mov_b32_e32 v0, s4 11724; GFX7-NEXT: v_mov_b32_e32 v1, s5 11725; GFX7-NEXT: v_mov_b32_e32 v2, s6 11726; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 11727; GFX7-NEXT: v_mov_b32_e32 v0, s4 11728; GFX7-NEXT: v_mov_b32_e32 v1, s5 11729; GFX7-NEXT: s_waitcnt vmcnt(0) 11730; GFX7-NEXT: flat_store_dword v[0:1], v2 11731; GFX7-NEXT: s_endpgm 11732; 11733; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11734; GFX10-WGP: ; %bb.0: ; %entry 11735; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11736; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11737; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11738; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11739; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11740; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11741; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11742; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11743; GFX10-WGP-NEXT: s_endpgm 11744; 11745; GFX10-CU-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11746; GFX10-CU: ; %bb.0: ; %entry 11747; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11748; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11749; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11750; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11751; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11752; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11753; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11754; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11755; GFX10-CU-NEXT: s_endpgm 11756; 11757; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11758; SKIP-CACHE-INV: ; %bb.0: ; %entry 11759; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11760; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11761; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11762; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11763; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11764; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11765; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11766; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11767; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11768; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11769; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11770; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11771; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 11772; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11773; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11774; SKIP-CACHE-INV-NEXT: s_endpgm 11775; 11776; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11777; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11778; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11779; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11780; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11781; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11782; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11783; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11784; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11785; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11786; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11787; 11788; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11789; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11790; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11791; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11792; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11793; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11794; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11795; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11796; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11797; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11798; GFX90A-TGSPLIT-NEXT: s_endpgm 11799; 11800; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11801; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11802; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11803; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11804; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11805; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11806; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11807; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 11808; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11809; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11810; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11811; 11812; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11813; GFX940-TGSPLIT: ; %bb.0: ; %entry 11814; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11815; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11816; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11817; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11819; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 11820; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11821; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11822; GFX940-TGSPLIT-NEXT: s_endpgm 11823; 11824; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11825; GFX11-WGP: ; %bb.0: ; %entry 11826; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 11827; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11828; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11829; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11830; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 11831; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 11832; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11833; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11834; GFX11-WGP-NEXT: s_endpgm 11835; 11836; GFX11-CU-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11837; GFX11-CU: ; %bb.0: ; %entry 11838; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 11839; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11840; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11841; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11842; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 11843; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 11844; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 11845; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11846; GFX11-CU-NEXT: s_endpgm 11847; 11848; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11849; GFX12-WGP: ; %bb.0: ; %entry 11850; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 11851; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11852; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 11853; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11854; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 11855; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 11856; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 11857; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 11858; GFX12-WGP-NEXT: s_endpgm 11859; 11860; GFX12-CU-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: 11861; GFX12-CU: ; %bb.0: ; %entry 11862; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 11863; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11864; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 11865; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11866; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 11867; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 11868; GFX12-CU-NEXT: s_wait_loadcnt 0x0 11869; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 11870; GFX12-CU-NEXT: s_endpgm 11871 ptr addrspace(1) %out, i32 %in) { 11872entry: 11873 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire 11874 store i32 %val, ptr addrspace(1) %out, align 4 11875 ret void 11876} 11877 11878define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( 11879; GFX6-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11880; GFX6: ; %bb.0: ; %entry 11881; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11882; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 11883; GFX6-NEXT: s_waitcnt lgkmcnt(0) 11884; GFX6-NEXT: s_mov_b32 s11, s5 11885; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 11886; GFX6-NEXT: s_mov_b32 s9, 0x100f000 11887; GFX6-NEXT: s_mov_b32 s10, -1 11888; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 11889; GFX6-NEXT: s_mov_b32 s5, s11 11890; GFX6-NEXT: s_mov_b32 s6, s10 11891; GFX6-NEXT: s_mov_b32 s7, s9 11892; GFX6-NEXT: v_mov_b32_e32 v0, s8 11893; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 11894; GFX6-NEXT: s_waitcnt vmcnt(0) 11895; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 11896; GFX6-NEXT: s_endpgm 11897; 11898; GFX7-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11899; GFX7: ; %bb.0: ; %entry 11900; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11901; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 11902; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11903; GFX7-NEXT: v_mov_b32_e32 v0, s4 11904; GFX7-NEXT: v_mov_b32_e32 v1, s5 11905; GFX7-NEXT: v_mov_b32_e32 v2, s6 11906; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 11907; GFX7-NEXT: v_mov_b32_e32 v0, s4 11908; GFX7-NEXT: v_mov_b32_e32 v1, s5 11909; GFX7-NEXT: s_waitcnt vmcnt(0) 11910; GFX7-NEXT: flat_store_dword v[0:1], v2 11911; GFX7-NEXT: s_endpgm 11912; 11913; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11914; GFX10-WGP: ; %bb.0: ; %entry 11915; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 11916; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11917; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 11918; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11919; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 11920; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11921; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11922; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 11923; GFX10-WGP-NEXT: s_endpgm 11924; 11925; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11926; GFX10-CU: ; %bb.0: ; %entry 11927; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 11928; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11929; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 11930; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11931; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 11932; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11933; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11934; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 11935; GFX10-CU-NEXT: s_endpgm 11936; 11937; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11938; SKIP-CACHE-INV: ; %bb.0: ; %entry 11939; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11940; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 11941; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11942; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 11943; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 11944; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 11945; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 11946; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 11947; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 11948; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 11949; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 11950; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11951; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 11952; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11953; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 11954; SKIP-CACHE-INV-NEXT: s_endpgm 11955; 11956; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11957; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11958; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11959; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11960; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11961; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11962; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11963; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11964; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11965; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11966; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11967; 11968; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11969; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11970; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11971; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11972; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 11973; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11974; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 11975; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 11976; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11977; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 11978; GFX90A-TGSPLIT-NEXT: s_endpgm 11979; 11980; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11981; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11982; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11983; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11984; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11985; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11986; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11987; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 11988; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11989; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 11990; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11991; 11992; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 11993; GFX940-TGSPLIT: ; %bb.0: ; %entry 11994; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 11995; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11996; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 11997; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11998; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 11999; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 12000; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12001; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12002; GFX940-TGSPLIT-NEXT: s_endpgm 12003; 12004; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 12005; GFX11-WGP: ; %bb.0: ; %entry 12006; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12007; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12008; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12009; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12010; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12011; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12012; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12013; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12014; GFX11-WGP-NEXT: s_endpgm 12015; 12016; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 12017; GFX11-CU: ; %bb.0: ; %entry 12018; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12019; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12020; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12021; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12022; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12023; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12024; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12025; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12026; GFX11-CU-NEXT: s_endpgm 12027; 12028; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 12029; GFX12-WGP: ; %bb.0: ; %entry 12030; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12031; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12032; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12033; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12034; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12035; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 12036; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12037; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12038; GFX12-WGP-NEXT: s_endpgm 12039; 12040; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: 12041; GFX12-CU: ; %bb.0: ; %entry 12042; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12043; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12044; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12045; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12046; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12047; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 12048; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12049; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12050; GFX12-CU-NEXT: s_endpgm 12051 ptr addrspace(1) %out, i32 %in) { 12052entry: 12053 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel 12054 store i32 %val, ptr addrspace(1) %out, align 4 12055 ret void 12056} 12057 12058define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( 12059; GFX6-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12060; GFX6: ; %bb.0: ; %entry 12061; GFX6-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12062; GFX6-NEXT: s_load_dword s8, s[8:9], 0x2 12063; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12064; GFX6-NEXT: s_mov_b32 s11, s5 12065; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12066; GFX6-NEXT: s_mov_b32 s9, 0x100f000 12067; GFX6-NEXT: s_mov_b32 s10, -1 12068; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12069; GFX6-NEXT: s_mov_b32 s5, s11 12070; GFX6-NEXT: s_mov_b32 s6, s10 12071; GFX6-NEXT: s_mov_b32 s7, s9 12072; GFX6-NEXT: v_mov_b32_e32 v0, s8 12073; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 12074; GFX6-NEXT: s_waitcnt vmcnt(0) 12075; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 12076; GFX6-NEXT: s_endpgm 12077; 12078; GFX7-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12079; GFX7: ; %bb.0: ; %entry 12080; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12081; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 12082; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12083; GFX7-NEXT: v_mov_b32_e32 v0, s4 12084; GFX7-NEXT: v_mov_b32_e32 v1, s5 12085; GFX7-NEXT: v_mov_b32_e32 v2, s6 12086; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 12087; GFX7-NEXT: v_mov_b32_e32 v0, s4 12088; GFX7-NEXT: v_mov_b32_e32 v1, s5 12089; GFX7-NEXT: s_waitcnt vmcnt(0) 12090; GFX7-NEXT: flat_store_dword v[0:1], v2 12091; GFX7-NEXT: s_endpgm 12092; 12093; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12094; GFX10-WGP: ; %bb.0: ; %entry 12095; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12096; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12097; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 12098; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12099; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s6 12100; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12101; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12102; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 12103; GFX10-WGP-NEXT: s_endpgm 12104; 12105; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12106; GFX10-CU: ; %bb.0: ; %entry 12107; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12108; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12109; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 12110; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12111; GFX10-CU-NEXT: v_mov_b32_e32 v1, s6 12112; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12113; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12114; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 12115; GFX10-CU-NEXT: s_endpgm 12116; 12117; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12118; SKIP-CACHE-INV: ; %bb.0: ; %entry 12119; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12120; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[4:5], 0x2 12121; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12122; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s1 12123; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12124; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, 0xf000 12125; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 12126; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12127; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s7 12128; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s6 12129; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s5 12130; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 12131; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc 12132; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12133; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 12134; SKIP-CACHE-INV-NEXT: s_endpgm 12135; 12136; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12137; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12138; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12139; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12140; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12141; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12142; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12143; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12144; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12145; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12146; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12147; 12148; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12149; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12150; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12151; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12152; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 12153; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12154; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12155; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc 12156; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12157; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 12158; GFX90A-TGSPLIT-NEXT: s_endpgm 12159; 12160; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12161; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12162; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12163; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12164; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12165; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12166; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12167; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 12168; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12169; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12170; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12171; 12172; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12173; GFX940-TGSPLIT: ; %bb.0: ; %entry 12174; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12175; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12176; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 12177; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12178; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12179; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 12180; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12181; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 12182; GFX940-TGSPLIT-NEXT: s_endpgm 12183; 12184; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12185; GFX11-WGP: ; %bb.0: ; %entry 12186; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12187; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12188; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12189; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12190; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2 12191; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12192; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12193; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12194; GFX11-WGP-NEXT: s_endpgm 12195; 12196; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12197; GFX11-CU: ; %bb.0: ; %entry 12198; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12199; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12200; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12201; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12202; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2 12203; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc 12204; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12205; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12206; GFX11-CU-NEXT: s_endpgm 12207; 12208; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12209; GFX12-WGP: ; %bb.0: ; %entry 12210; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12211; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12212; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 12213; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12214; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2 12215; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 12216; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12217; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 12218; GFX12-WGP-NEXT: s_endpgm 12219; 12220; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: 12221; GFX12-CU: ; %bb.0: ; %entry 12222; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12223; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12224; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 12225; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12226; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2 12227; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN 12228; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12229; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 12230; GFX12-CU-NEXT: s_endpgm 12231 ptr addrspace(1) %out, i32 %in) { 12232entry: 12233 %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst 12234 store i32 %val, ptr addrspace(1) %out, align 4 12235 ret void 12236} 12237 12238define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( 12239; GFX6-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12240; GFX6: ; %bb.0: ; %entry 12241; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 12242; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 12243; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 12244; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 12245; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12246; GFX6-NEXT: s_mov_b32 s12, s5 12247; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12248; GFX6-NEXT: s_mov_b32 s10, 0x100f000 12249; GFX6-NEXT: s_mov_b32 s11, -1 12250; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12251; GFX6-NEXT: s_mov_b32 s5, s12 12252; GFX6-NEXT: s_mov_b32 s6, s11 12253; GFX6-NEXT: s_mov_b32 s7, s10 12254; GFX6-NEXT: v_mov_b32_e32 v0, s9 12255; GFX6-NEXT: v_mov_b32_e32 v2, s8 12256; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12257; GFX6-NEXT: v_mov_b32_e32 v1, v2 12258; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 12259; GFX6-NEXT: s_endpgm 12260; 12261; GFX7-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12262; GFX7: ; %bb.0: ; %entry 12263; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 12264; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 12265; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 12266; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 12267; GFX7-NEXT: s_mov_b64 s[10:11], 16 12268; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12269; GFX7-NEXT: s_mov_b32 s4, s8 12270; GFX7-NEXT: s_mov_b32 s5, s9 12271; GFX7-NEXT: s_mov_b32 s9, s10 12272; GFX7-NEXT: s_mov_b32 s8, s11 12273; GFX7-NEXT: s_add_u32 s4, s4, s9 12274; GFX7-NEXT: s_addc_u32 s8, s5, s8 12275; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 12276; GFX7-NEXT: s_mov_b32 s5, s8 12277; GFX7-NEXT: v_mov_b32_e32 v2, s7 12278; GFX7-NEXT: v_mov_b32_e32 v0, s6 12279; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12280; GFX7-NEXT: v_mov_b32_e32 v3, v0 12281; GFX7-NEXT: v_mov_b32_e32 v0, s4 12282; GFX7-NEXT: v_mov_b32_e32 v1, s5 12283; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 12284; GFX7-NEXT: s_endpgm 12285; 12286; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12287; GFX10-WGP: ; %bb.0: ; %entry 12288; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12289; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12290; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 12291; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 12292; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12293; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12294; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 12295; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12296; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 12297; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12298; GFX10-WGP-NEXT: s_endpgm 12299; 12300; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12301; GFX10-CU: ; %bb.0: ; %entry 12302; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12303; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12304; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 12305; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 12306; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12307; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12308; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 12309; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12310; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 12311; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12312; GFX10-CU-NEXT: s_endpgm 12313; 12314; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12315; SKIP-CACHE-INV: ; %bb.0: ; %entry 12316; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 12317; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 12318; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 12319; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 12320; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12321; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 12322; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12323; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 12324; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 12325; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12326; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 12327; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 12328; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 12329; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 12330; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 12331; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12332; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 12333; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 12334; SKIP-CACHE-INV-NEXT: s_endpgm 12335; 12336; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12337; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12338; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12339; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12340; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12341; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12342; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12343; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12344; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12345; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12346; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12347; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12348; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12349; 12350; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12351; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12352; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12353; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12354; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12355; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12356; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12357; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12358; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12359; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12360; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12361; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12362; GFX90A-TGSPLIT-NEXT: s_endpgm 12363; 12364; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12365; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12366; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12367; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12368; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12369; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12370; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12371; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12372; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12373; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12374; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12375; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12376; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12377; 12378; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12379; GFX940-TGSPLIT: ; %bb.0: ; %entry 12380; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12381; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12382; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12383; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12384; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12385; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12386; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12387; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12388; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12389; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12390; GFX940-TGSPLIT-NEXT: s_endpgm 12391; 12392; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12393; GFX11-WGP: ; %bb.0: ; %entry 12394; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12395; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12396; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12397; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12398; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12399; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12400; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 12401; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12402; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 12403; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12404; GFX11-WGP-NEXT: s_endpgm 12405; 12406; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12407; GFX11-CU: ; %bb.0: ; %entry 12408; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12409; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12410; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12411; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12412; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12413; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12414; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 12415; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12416; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 12417; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12418; GFX11-CU-NEXT: s_endpgm 12419; 12420; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12421; GFX12-WGP: ; %bb.0: ; %entry 12422; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12423; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12424; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12425; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12426; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12427; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12428; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 12429; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12430; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 12431; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12432; GFX12-WGP-NEXT: s_endpgm 12433; 12434; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: 12435; GFX12-CU: ; %bb.0: ; %entry 12436; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12437; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12438; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12439; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12440; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12441; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12442; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 12443; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12444; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 12445; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12446; GFX12-CU-NEXT: s_endpgm 12447 ptr addrspace(1) %out, i32 %in, i32 %old) { 12448entry: 12449 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 12450 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic 12451 ret void 12452} 12453 12454define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( 12455; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12456; GFX6: ; %bb.0: ; %entry 12457; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 12458; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 12459; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 12460; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 12461; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12462; GFX6-NEXT: s_mov_b32 s12, s5 12463; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12464; GFX6-NEXT: s_mov_b32 s10, 0x100f000 12465; GFX6-NEXT: s_mov_b32 s11, -1 12466; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12467; GFX6-NEXT: s_mov_b32 s5, s12 12468; GFX6-NEXT: s_mov_b32 s6, s11 12469; GFX6-NEXT: s_mov_b32 s7, s10 12470; GFX6-NEXT: v_mov_b32_e32 v0, s9 12471; GFX6-NEXT: v_mov_b32_e32 v2, s8 12472; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12473; GFX6-NEXT: v_mov_b32_e32 v1, v2 12474; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 12475; GFX6-NEXT: s_endpgm 12476; 12477; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12478; GFX7: ; %bb.0: ; %entry 12479; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 12480; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 12481; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 12482; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 12483; GFX7-NEXT: s_mov_b64 s[10:11], 16 12484; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12485; GFX7-NEXT: s_mov_b32 s4, s8 12486; GFX7-NEXT: s_mov_b32 s5, s9 12487; GFX7-NEXT: s_mov_b32 s9, s10 12488; GFX7-NEXT: s_mov_b32 s8, s11 12489; GFX7-NEXT: s_add_u32 s4, s4, s9 12490; GFX7-NEXT: s_addc_u32 s8, s5, s8 12491; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 12492; GFX7-NEXT: s_mov_b32 s5, s8 12493; GFX7-NEXT: v_mov_b32_e32 v2, s7 12494; GFX7-NEXT: v_mov_b32_e32 v0, s6 12495; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12496; GFX7-NEXT: v_mov_b32_e32 v3, v0 12497; GFX7-NEXT: v_mov_b32_e32 v0, s4 12498; GFX7-NEXT: v_mov_b32_e32 v1, s5 12499; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 12500; GFX7-NEXT: s_endpgm 12501; 12502; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12503; GFX10-WGP: ; %bb.0: ; %entry 12504; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12505; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12506; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 12507; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 12508; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12509; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12510; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 12511; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12512; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 12513; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12514; GFX10-WGP-NEXT: s_endpgm 12515; 12516; GFX10-CU-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12517; GFX10-CU: ; %bb.0: ; %entry 12518; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12519; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12520; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 12521; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 12522; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12523; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12524; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 12525; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12526; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 12527; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12528; GFX10-CU-NEXT: s_endpgm 12529; 12530; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12531; SKIP-CACHE-INV: ; %bb.0: ; %entry 12532; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 12533; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 12534; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 12535; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 12536; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12537; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 12538; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12539; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 12540; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 12541; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12542; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 12543; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 12544; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 12545; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 12546; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 12547; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12548; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 12549; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 12550; SKIP-CACHE-INV-NEXT: s_endpgm 12551; 12552; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12553; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12554; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12555; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12556; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12557; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12558; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12559; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12560; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12561; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12562; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12563; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12564; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12565; 12566; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12567; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12568; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12569; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12570; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12571; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12572; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12573; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12574; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12575; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12576; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12577; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12578; GFX90A-TGSPLIT-NEXT: s_endpgm 12579; 12580; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12581; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12582; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12583; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12584; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12585; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12586; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12587; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12588; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12589; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12590; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12591; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12592; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12593; 12594; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12595; GFX940-TGSPLIT: ; %bb.0: ; %entry 12596; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12597; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12598; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12599; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12600; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12601; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12602; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12603; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12604; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12605; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12606; GFX940-TGSPLIT-NEXT: s_endpgm 12607; 12608; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12609; GFX11-WGP: ; %bb.0: ; %entry 12610; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12611; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12612; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12613; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12614; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12615; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12616; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 12617; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12618; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 12619; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12620; GFX11-WGP-NEXT: s_endpgm 12621; 12622; GFX11-CU-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12623; GFX11-CU: ; %bb.0: ; %entry 12624; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12625; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12626; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12627; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12628; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12629; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12630; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 12631; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12632; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 12633; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12634; GFX11-CU-NEXT: s_endpgm 12635; 12636; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12637; GFX12-WGP: ; %bb.0: ; %entry 12638; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12639; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12640; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12641; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12642; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12643; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12644; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 12645; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12646; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 12647; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12648; GFX12-WGP-NEXT: s_endpgm 12649; 12650; GFX12-CU-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: 12651; GFX12-CU: ; %bb.0: ; %entry 12652; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12653; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12654; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12655; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12656; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12657; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12658; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 12659; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12660; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 12661; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12662; GFX12-CU-NEXT: s_endpgm 12663 ptr addrspace(1) %out, i32 %in, i32 %old) { 12664entry: 12665 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 12666 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic 12667 ret void 12668} 12669 12670define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( 12671; GFX6-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12672; GFX6: ; %bb.0: ; %entry 12673; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 12674; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 12675; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 12676; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 12677; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12678; GFX6-NEXT: s_mov_b32 s12, s5 12679; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12680; GFX6-NEXT: s_mov_b32 s10, 0x100f000 12681; GFX6-NEXT: s_mov_b32 s11, -1 12682; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12683; GFX6-NEXT: s_mov_b32 s5, s12 12684; GFX6-NEXT: s_mov_b32 s6, s11 12685; GFX6-NEXT: s_mov_b32 s7, s10 12686; GFX6-NEXT: v_mov_b32_e32 v0, s9 12687; GFX6-NEXT: v_mov_b32_e32 v2, s8 12688; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12689; GFX6-NEXT: v_mov_b32_e32 v1, v2 12690; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 12691; GFX6-NEXT: s_endpgm 12692; 12693; GFX7-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12694; GFX7: ; %bb.0: ; %entry 12695; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 12696; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 12697; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 12698; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 12699; GFX7-NEXT: s_mov_b64 s[10:11], 16 12700; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12701; GFX7-NEXT: s_mov_b32 s4, s8 12702; GFX7-NEXT: s_mov_b32 s5, s9 12703; GFX7-NEXT: s_mov_b32 s9, s10 12704; GFX7-NEXT: s_mov_b32 s8, s11 12705; GFX7-NEXT: s_add_u32 s4, s4, s9 12706; GFX7-NEXT: s_addc_u32 s8, s5, s8 12707; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 12708; GFX7-NEXT: s_mov_b32 s5, s8 12709; GFX7-NEXT: v_mov_b32_e32 v2, s7 12710; GFX7-NEXT: v_mov_b32_e32 v0, s6 12711; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12712; GFX7-NEXT: v_mov_b32_e32 v3, v0 12713; GFX7-NEXT: v_mov_b32_e32 v0, s4 12714; GFX7-NEXT: v_mov_b32_e32 v1, s5 12715; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 12716; GFX7-NEXT: s_endpgm 12717; 12718; GFX10-WGP-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12719; GFX10-WGP: ; %bb.0: ; %entry 12720; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12721; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12722; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 12723; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 12724; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12725; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12726; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 12727; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12728; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 12729; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12730; GFX10-WGP-NEXT: s_endpgm 12731; 12732; GFX10-CU-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12733; GFX10-CU: ; %bb.0: ; %entry 12734; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12735; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12736; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 12737; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 12738; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12739; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12740; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 12741; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12742; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 12743; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12744; GFX10-CU-NEXT: s_endpgm 12745; 12746; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12747; SKIP-CACHE-INV: ; %bb.0: ; %entry 12748; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 12749; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 12750; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 12751; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 12752; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12753; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 12754; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12755; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 12756; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 12757; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12758; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 12759; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 12760; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 12761; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 12762; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 12763; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12764; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 12765; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 12766; SKIP-CACHE-INV-NEXT: s_endpgm 12767; 12768; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12769; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12770; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12771; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12772; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12773; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12774; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12775; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12776; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12777; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12778; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12779; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12780; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12781; 12782; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12783; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12784; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12785; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12786; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12787; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12788; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12789; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12790; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12791; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12792; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12793; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12794; GFX90A-TGSPLIT-NEXT: s_endpgm 12795; 12796; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12797; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12798; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12799; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12800; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12801; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12802; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12803; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12804; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12805; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12806; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12807; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12808; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12809; 12810; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12811; GFX940-TGSPLIT: ; %bb.0: ; %entry 12812; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12813; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 12814; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 12815; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 12816; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12817; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 12818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 12819; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12820; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12821; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 12822; GFX940-TGSPLIT-NEXT: s_endpgm 12823; 12824; GFX11-WGP-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12825; GFX11-WGP: ; %bb.0: ; %entry 12826; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 12827; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12828; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12829; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12830; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12831; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12832; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 12833; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12834; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 12835; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12836; GFX11-WGP-NEXT: s_endpgm 12837; 12838; GFX11-CU-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12839; GFX11-CU: ; %bb.0: ; %entry 12840; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 12841; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12842; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12843; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12844; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12845; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12846; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 12847; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12848; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 12849; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12850; GFX11-CU-NEXT: s_endpgm 12851; 12852; GFX12-WGP-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12853; GFX12-WGP: ; %bb.0: ; %entry 12854; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 12855; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12856; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 12857; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 12858; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12859; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12860; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 12861; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12862; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 12863; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12864; GFX12-WGP-NEXT: s_endpgm 12865; 12866; GFX12-CU-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: 12867; GFX12-CU: ; %bb.0: ; %entry 12868; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 12869; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 12870; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 12871; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 12872; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12873; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12874; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 12875; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12876; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 12877; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 12878; GFX12-CU-NEXT: s_endpgm 12879 ptr addrspace(1) %out, i32 %in, i32 %old) { 12880entry: 12881 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 12882 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic 12883 ret void 12884} 12885 12886define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( 12887; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12888; GFX6: ; %bb.0: ; %entry 12889; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 12890; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 12891; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 12892; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 12893; GFX6-NEXT: s_waitcnt lgkmcnt(0) 12894; GFX6-NEXT: s_mov_b32 s12, s5 12895; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 12896; GFX6-NEXT: s_mov_b32 s10, 0x100f000 12897; GFX6-NEXT: s_mov_b32 s11, -1 12898; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 12899; GFX6-NEXT: s_mov_b32 s5, s12 12900; GFX6-NEXT: s_mov_b32 s6, s11 12901; GFX6-NEXT: s_mov_b32 s7, s10 12902; GFX6-NEXT: v_mov_b32_e32 v0, s9 12903; GFX6-NEXT: v_mov_b32_e32 v2, s8 12904; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12905; GFX6-NEXT: v_mov_b32_e32 v1, v2 12906; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 12907; GFX6-NEXT: s_endpgm 12908; 12909; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12910; GFX7: ; %bb.0: ; %entry 12911; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 12912; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 12913; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 12914; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 12915; GFX7-NEXT: s_mov_b64 s[10:11], 16 12916; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12917; GFX7-NEXT: s_mov_b32 s4, s8 12918; GFX7-NEXT: s_mov_b32 s5, s9 12919; GFX7-NEXT: s_mov_b32 s9, s10 12920; GFX7-NEXT: s_mov_b32 s8, s11 12921; GFX7-NEXT: s_add_u32 s4, s4, s9 12922; GFX7-NEXT: s_addc_u32 s8, s5, s8 12923; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 12924; GFX7-NEXT: s_mov_b32 s5, s8 12925; GFX7-NEXT: v_mov_b32_e32 v2, s7 12926; GFX7-NEXT: v_mov_b32_e32 v0, s6 12927; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12928; GFX7-NEXT: v_mov_b32_e32 v3, v0 12929; GFX7-NEXT: v_mov_b32_e32 v0, s4 12930; GFX7-NEXT: v_mov_b32_e32 v1, s5 12931; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 12932; GFX7-NEXT: s_endpgm 12933; 12934; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12935; GFX10-WGP: ; %bb.0: ; %entry 12936; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 12937; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12938; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 12939; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 12940; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12941; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12942; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 12943; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12944; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 12945; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12946; GFX10-WGP-NEXT: s_endpgm 12947; 12948; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12949; GFX10-CU: ; %bb.0: ; %entry 12950; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 12951; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12952; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 12953; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 12954; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12955; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12956; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 12957; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 12958; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 12959; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 12960; GFX10-CU-NEXT: s_endpgm 12961; 12962; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12963; SKIP-CACHE-INV: ; %bb.0: ; %entry 12964; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 12965; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 12966; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 12967; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 12968; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12969; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 12970; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 12971; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 12972; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 12973; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 12974; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 12975; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 12976; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 12977; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 12978; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 12979; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 12980; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 12981; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 12982; SKIP-CACHE-INV-NEXT: s_endpgm 12983; 12984; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12985; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12986; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 12987; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 12988; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 12989; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 12990; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12991; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 12992; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 12993; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 12994; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 12995; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 12996; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12997; 12998; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 12999; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13000; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13001; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13002; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13003; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13004; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13005; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13007; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13008; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13009; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13010; GFX90A-TGSPLIT-NEXT: s_endpgm 13011; 13012; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13013; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13014; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13015; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13016; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13017; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13018; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13019; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13020; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13021; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13022; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13023; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13024; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13025; 13026; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13027; GFX940-TGSPLIT: ; %bb.0: ; %entry 13028; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13029; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13030; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13031; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13032; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13033; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13034; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13035; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13036; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13037; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13038; GFX940-TGSPLIT-NEXT: s_endpgm 13039; 13040; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13041; GFX11-WGP: ; %bb.0: ; %entry 13042; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13043; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13044; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13045; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13046; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13047; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13048; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13049; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13050; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13051; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13052; GFX11-WGP-NEXT: s_endpgm 13053; 13054; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13055; GFX11-CU: ; %bb.0: ; %entry 13056; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13057; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13058; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13059; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13060; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13061; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13062; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13063; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13064; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13065; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13066; GFX11-CU-NEXT: s_endpgm 13067; 13068; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13069; GFX12-WGP: ; %bb.0: ; %entry 13070; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13071; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13072; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13073; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13074; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13075; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13076; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13077; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13078; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13079; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13080; GFX12-WGP-NEXT: s_endpgm 13081; 13082; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: 13083; GFX12-CU: ; %bb.0: ; %entry 13084; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13085; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13086; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13087; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13088; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13089; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13090; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13091; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13092; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13093; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13094; GFX12-CU-NEXT: s_endpgm 13095 ptr addrspace(1) %out, i32 %in, i32 %old) { 13096entry: 13097 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13098 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic 13099 ret void 13100} 13101 13102define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( 13103; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13104; GFX6: ; %bb.0: ; %entry 13105; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13106; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13107; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13108; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13109; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13110; GFX6-NEXT: s_mov_b32 s12, s5 13111; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13112; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13113; GFX6-NEXT: s_mov_b32 s11, -1 13114; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13115; GFX6-NEXT: s_mov_b32 s5, s12 13116; GFX6-NEXT: s_mov_b32 s6, s11 13117; GFX6-NEXT: s_mov_b32 s7, s10 13118; GFX6-NEXT: v_mov_b32_e32 v0, s9 13119; GFX6-NEXT: v_mov_b32_e32 v2, s8 13120; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13121; GFX6-NEXT: v_mov_b32_e32 v1, v2 13122; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13123; GFX6-NEXT: s_endpgm 13124; 13125; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13126; GFX7: ; %bb.0: ; %entry 13127; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13128; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13129; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13130; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13131; GFX7-NEXT: s_mov_b64 s[10:11], 16 13132; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13133; GFX7-NEXT: s_mov_b32 s4, s8 13134; GFX7-NEXT: s_mov_b32 s5, s9 13135; GFX7-NEXT: s_mov_b32 s9, s10 13136; GFX7-NEXT: s_mov_b32 s8, s11 13137; GFX7-NEXT: s_add_u32 s4, s4, s9 13138; GFX7-NEXT: s_addc_u32 s8, s5, s8 13139; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13140; GFX7-NEXT: s_mov_b32 s5, s8 13141; GFX7-NEXT: v_mov_b32_e32 v2, s7 13142; GFX7-NEXT: v_mov_b32_e32 v0, s6 13143; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13144; GFX7-NEXT: v_mov_b32_e32 v3, v0 13145; GFX7-NEXT: v_mov_b32_e32 v0, s4 13146; GFX7-NEXT: v_mov_b32_e32 v1, s5 13147; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13148; GFX7-NEXT: s_endpgm 13149; 13150; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13151; GFX10-WGP: ; %bb.0: ; %entry 13152; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13153; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13154; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13155; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13156; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13157; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13158; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13159; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13160; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13161; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13162; GFX10-WGP-NEXT: s_endpgm 13163; 13164; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13165; GFX10-CU: ; %bb.0: ; %entry 13166; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13167; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13168; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13169; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13170; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13171; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13172; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13173; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13174; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13175; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13176; GFX10-CU-NEXT: s_endpgm 13177; 13178; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13179; SKIP-CACHE-INV: ; %bb.0: ; %entry 13180; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13181; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13182; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13183; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13184; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13185; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13186; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13187; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13188; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13189; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13190; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13191; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13192; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13193; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13194; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13195; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13196; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13197; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13198; SKIP-CACHE-INV-NEXT: s_endpgm 13199; 13200; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13201; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13202; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13203; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13204; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13205; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13206; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13207; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13208; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13209; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13210; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13211; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13212; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13213; 13214; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13215; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13216; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13217; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13218; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13219; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13220; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13221; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13222; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13223; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13224; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13225; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13226; GFX90A-TGSPLIT-NEXT: s_endpgm 13227; 13228; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13229; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13230; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13231; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13232; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13233; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13234; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13235; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13236; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13237; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13238; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13239; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13240; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13241; 13242; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13243; GFX940-TGSPLIT: ; %bb.0: ; %entry 13244; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13245; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13246; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13247; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13248; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13249; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13250; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13251; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13252; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13253; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13254; GFX940-TGSPLIT-NEXT: s_endpgm 13255; 13256; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13257; GFX11-WGP: ; %bb.0: ; %entry 13258; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13259; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13260; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13261; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13262; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13263; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13264; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13265; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13266; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13267; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13268; GFX11-WGP-NEXT: s_endpgm 13269; 13270; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13271; GFX11-CU: ; %bb.0: ; %entry 13272; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13273; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13274; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13275; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13276; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13277; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13278; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13279; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13280; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13281; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13282; GFX11-CU-NEXT: s_endpgm 13283; 13284; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13285; GFX12-WGP: ; %bb.0: ; %entry 13286; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13287; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13288; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13289; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13290; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13291; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13292; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13293; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13294; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13295; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13296; GFX12-WGP-NEXT: s_endpgm 13297; 13298; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: 13299; GFX12-CU: ; %bb.0: ; %entry 13300; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13301; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13302; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13303; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13304; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13305; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13306; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13307; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13308; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13309; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13310; GFX12-CU-NEXT: s_endpgm 13311 ptr addrspace(1) %out, i32 %in, i32 %old) { 13312entry: 13313 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13314 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic 13315 ret void 13316} 13317 13318define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg( 13319; GFX6-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13320; GFX6: ; %bb.0: ; %entry 13321; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13322; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13323; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13324; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13325; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13326; GFX6-NEXT: s_mov_b32 s12, s5 13327; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13328; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13329; GFX6-NEXT: s_mov_b32 s11, -1 13330; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13331; GFX6-NEXT: s_mov_b32 s5, s12 13332; GFX6-NEXT: s_mov_b32 s6, s11 13333; GFX6-NEXT: s_mov_b32 s7, s10 13334; GFX6-NEXT: v_mov_b32_e32 v0, s9 13335; GFX6-NEXT: v_mov_b32_e32 v2, s8 13336; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13337; GFX6-NEXT: v_mov_b32_e32 v1, v2 13338; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13339; GFX6-NEXT: s_endpgm 13340; 13341; GFX7-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13342; GFX7: ; %bb.0: ; %entry 13343; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13344; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13345; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13346; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13347; GFX7-NEXT: s_mov_b64 s[10:11], 16 13348; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13349; GFX7-NEXT: s_mov_b32 s4, s8 13350; GFX7-NEXT: s_mov_b32 s5, s9 13351; GFX7-NEXT: s_mov_b32 s9, s10 13352; GFX7-NEXT: s_mov_b32 s8, s11 13353; GFX7-NEXT: s_add_u32 s4, s4, s9 13354; GFX7-NEXT: s_addc_u32 s8, s5, s8 13355; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13356; GFX7-NEXT: s_mov_b32 s5, s8 13357; GFX7-NEXT: v_mov_b32_e32 v2, s7 13358; GFX7-NEXT: v_mov_b32_e32 v0, s6 13359; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13360; GFX7-NEXT: v_mov_b32_e32 v3, v0 13361; GFX7-NEXT: v_mov_b32_e32 v0, s4 13362; GFX7-NEXT: v_mov_b32_e32 v1, s5 13363; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13364; GFX7-NEXT: s_endpgm 13365; 13366; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13367; GFX10-WGP: ; %bb.0: ; %entry 13368; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13369; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13370; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13371; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13372; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13373; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13374; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13375; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13376; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13377; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13378; GFX10-WGP-NEXT: s_endpgm 13379; 13380; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13381; GFX10-CU: ; %bb.0: ; %entry 13382; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13383; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13384; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13385; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13386; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13387; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13388; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13389; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13390; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13391; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13392; GFX10-CU-NEXT: s_endpgm 13393; 13394; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13395; SKIP-CACHE-INV: ; %bb.0: ; %entry 13396; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13397; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13398; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13399; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13400; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13401; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13402; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13403; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13404; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13405; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13406; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13407; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13408; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13409; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13410; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13411; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13412; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13413; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13414; SKIP-CACHE-INV-NEXT: s_endpgm 13415; 13416; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13417; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13418; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13419; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13420; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13421; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13422; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13423; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13424; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13425; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13426; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13427; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13428; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13429; 13430; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13431; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13432; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13433; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13434; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13435; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13436; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13437; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13438; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13439; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13440; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13441; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13442; GFX90A-TGSPLIT-NEXT: s_endpgm 13443; 13444; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13445; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13446; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13447; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13448; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13449; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13450; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13451; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13452; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13453; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13454; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13455; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13456; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13457; 13458; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13459; GFX940-TGSPLIT: ; %bb.0: ; %entry 13460; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13461; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13462; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13463; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13464; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13465; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13466; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13467; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13468; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13469; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13470; GFX940-TGSPLIT-NEXT: s_endpgm 13471; 13472; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13473; GFX11-WGP: ; %bb.0: ; %entry 13474; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13475; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13476; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13477; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13478; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13479; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13480; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13481; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13482; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13483; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13484; GFX11-WGP-NEXT: s_endpgm 13485; 13486; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13487; GFX11-CU: ; %bb.0: ; %entry 13488; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13489; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13490; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13491; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13492; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13493; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13494; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13495; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13496; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13497; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13498; GFX11-CU-NEXT: s_endpgm 13499; 13500; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13501; GFX12-WGP: ; %bb.0: ; %entry 13502; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13503; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13504; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13505; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13506; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13507; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13508; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13509; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13510; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13511; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13512; GFX12-WGP-NEXT: s_endpgm 13513; 13514; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: 13515; GFX12-CU: ; %bb.0: ; %entry 13516; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13517; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13518; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13519; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13520; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13521; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13522; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13523; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13524; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13525; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13526; GFX12-CU-NEXT: s_endpgm 13527 ptr addrspace(1) %out, i32 %in, i32 %old) { 13528entry: 13529 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13530 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire 13531 ret void 13532} 13533 13534define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( 13535; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13536; GFX6: ; %bb.0: ; %entry 13537; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13538; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13539; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13540; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13541; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13542; GFX6-NEXT: s_mov_b32 s12, s5 13543; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13544; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13545; GFX6-NEXT: s_mov_b32 s11, -1 13546; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13547; GFX6-NEXT: s_mov_b32 s5, s12 13548; GFX6-NEXT: s_mov_b32 s6, s11 13549; GFX6-NEXT: s_mov_b32 s7, s10 13550; GFX6-NEXT: v_mov_b32_e32 v0, s9 13551; GFX6-NEXT: v_mov_b32_e32 v2, s8 13552; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13553; GFX6-NEXT: v_mov_b32_e32 v1, v2 13554; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13555; GFX6-NEXT: s_endpgm 13556; 13557; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13558; GFX7: ; %bb.0: ; %entry 13559; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13560; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13561; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13562; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13563; GFX7-NEXT: s_mov_b64 s[10:11], 16 13564; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13565; GFX7-NEXT: s_mov_b32 s4, s8 13566; GFX7-NEXT: s_mov_b32 s5, s9 13567; GFX7-NEXT: s_mov_b32 s9, s10 13568; GFX7-NEXT: s_mov_b32 s8, s11 13569; GFX7-NEXT: s_add_u32 s4, s4, s9 13570; GFX7-NEXT: s_addc_u32 s8, s5, s8 13571; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13572; GFX7-NEXT: s_mov_b32 s5, s8 13573; GFX7-NEXT: v_mov_b32_e32 v2, s7 13574; GFX7-NEXT: v_mov_b32_e32 v0, s6 13575; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13576; GFX7-NEXT: v_mov_b32_e32 v3, v0 13577; GFX7-NEXT: v_mov_b32_e32 v0, s4 13578; GFX7-NEXT: v_mov_b32_e32 v1, s5 13579; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13580; GFX7-NEXT: s_endpgm 13581; 13582; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13583; GFX10-WGP: ; %bb.0: ; %entry 13584; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13585; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13586; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13587; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13588; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13589; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13590; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13591; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13592; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13593; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13594; GFX10-WGP-NEXT: s_endpgm 13595; 13596; GFX10-CU-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13597; GFX10-CU: ; %bb.0: ; %entry 13598; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13599; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13600; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13601; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13602; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13603; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13604; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13605; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13606; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13607; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13608; GFX10-CU-NEXT: s_endpgm 13609; 13610; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13611; SKIP-CACHE-INV: ; %bb.0: ; %entry 13612; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13613; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13614; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13615; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13616; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13617; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13618; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13619; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13620; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13621; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13622; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13623; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13624; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13625; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13626; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13627; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13628; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13629; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13630; SKIP-CACHE-INV-NEXT: s_endpgm 13631; 13632; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13633; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13634; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13635; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13636; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13637; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13638; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13639; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13640; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13641; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13642; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13643; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13644; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13645; 13646; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13647; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13648; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13649; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13650; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13651; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13652; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13653; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13654; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13655; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13656; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13657; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13658; GFX90A-TGSPLIT-NEXT: s_endpgm 13659; 13660; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13661; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13662; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13663; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13664; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13665; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13666; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13667; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13668; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13669; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13670; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13671; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13672; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13673; 13674; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13675; GFX940-TGSPLIT: ; %bb.0: ; %entry 13676; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13677; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13678; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13679; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13680; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13681; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13682; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13683; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13684; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13685; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13686; GFX940-TGSPLIT-NEXT: s_endpgm 13687; 13688; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13689; GFX11-WGP: ; %bb.0: ; %entry 13690; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13691; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13692; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13693; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13694; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13695; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13696; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13697; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13698; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13699; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13700; GFX11-WGP-NEXT: s_endpgm 13701; 13702; GFX11-CU-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13703; GFX11-CU: ; %bb.0: ; %entry 13704; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13705; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13706; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13707; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13708; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13709; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13710; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13711; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13712; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13713; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13714; GFX11-CU-NEXT: s_endpgm 13715; 13716; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13717; GFX12-WGP: ; %bb.0: ; %entry 13718; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13719; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13720; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13721; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13722; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13723; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13724; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13725; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13726; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13727; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13728; GFX12-WGP-NEXT: s_endpgm 13729; 13730; GFX12-CU-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: 13731; GFX12-CU: ; %bb.0: ; %entry 13732; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13733; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13734; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13735; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13736; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13737; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13738; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13739; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13740; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13741; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13742; GFX12-CU-NEXT: s_endpgm 13743 ptr addrspace(1) %out, i32 %in, i32 %old) { 13744entry: 13745 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13746 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire 13747 ret void 13748} 13749 13750define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( 13751; GFX6-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13752; GFX6: ; %bb.0: ; %entry 13753; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13754; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13755; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13756; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13757; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13758; GFX6-NEXT: s_mov_b32 s12, s5 13759; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13760; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13761; GFX6-NEXT: s_mov_b32 s11, -1 13762; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13763; GFX6-NEXT: s_mov_b32 s5, s12 13764; GFX6-NEXT: s_mov_b32 s6, s11 13765; GFX6-NEXT: s_mov_b32 s7, s10 13766; GFX6-NEXT: v_mov_b32_e32 v0, s9 13767; GFX6-NEXT: v_mov_b32_e32 v2, s8 13768; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13769; GFX6-NEXT: v_mov_b32_e32 v1, v2 13770; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13771; GFX6-NEXT: s_endpgm 13772; 13773; GFX7-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13774; GFX7: ; %bb.0: ; %entry 13775; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13776; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13777; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13778; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13779; GFX7-NEXT: s_mov_b64 s[10:11], 16 13780; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13781; GFX7-NEXT: s_mov_b32 s4, s8 13782; GFX7-NEXT: s_mov_b32 s5, s9 13783; GFX7-NEXT: s_mov_b32 s9, s10 13784; GFX7-NEXT: s_mov_b32 s8, s11 13785; GFX7-NEXT: s_add_u32 s4, s4, s9 13786; GFX7-NEXT: s_addc_u32 s8, s5, s8 13787; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 13788; GFX7-NEXT: s_mov_b32 s5, s8 13789; GFX7-NEXT: v_mov_b32_e32 v2, s7 13790; GFX7-NEXT: v_mov_b32_e32 v0, s6 13791; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13792; GFX7-NEXT: v_mov_b32_e32 v3, v0 13793; GFX7-NEXT: v_mov_b32_e32 v0, s4 13794; GFX7-NEXT: v_mov_b32_e32 v1, s5 13795; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 13796; GFX7-NEXT: s_endpgm 13797; 13798; GFX10-WGP-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13799; GFX10-WGP: ; %bb.0: ; %entry 13800; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 13801; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13802; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 13803; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 13804; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13805; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13806; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 13807; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13808; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 13809; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13810; GFX10-WGP-NEXT: s_endpgm 13811; 13812; GFX10-CU-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13813; GFX10-CU: ; %bb.0: ; %entry 13814; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 13815; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13816; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 13817; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 13818; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13819; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13820; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 13821; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13822; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 13823; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 13824; GFX10-CU-NEXT: s_endpgm 13825; 13826; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13827; SKIP-CACHE-INV: ; %bb.0: ; %entry 13828; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 13829; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 13830; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 13831; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 13832; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13833; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 13834; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 13835; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 13836; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 13837; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 13838; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 13839; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 13840; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 13841; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 13842; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 13843; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13844; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 13845; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 13846; SKIP-CACHE-INV-NEXT: s_endpgm 13847; 13848; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13849; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13850; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13851; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13852; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13853; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13854; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13855; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13856; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13857; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13858; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13859; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13860; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13861; 13862; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13863; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13864; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13865; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13866; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 13867; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 13868; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13869; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 13870; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 13871; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13872; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13873; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 13874; GFX90A-TGSPLIT-NEXT: s_endpgm 13875; 13876; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13877; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13878; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13879; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13880; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13881; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13882; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13883; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13884; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13885; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13886; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13887; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13888; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13889; 13890; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13891; GFX940-TGSPLIT: ; %bb.0: ; %entry 13892; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 13893; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13894; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 13895; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 13896; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13897; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 13898; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 13899; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 13900; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 13901; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 13902; GFX940-TGSPLIT-NEXT: s_endpgm 13903; 13904; GFX11-WGP-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13905; GFX11-WGP: ; %bb.0: ; %entry 13906; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 13907; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13908; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13909; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13910; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13911; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13912; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 13913; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13914; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 13915; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13916; GFX11-WGP-NEXT: s_endpgm 13917; 13918; GFX11-CU-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13919; GFX11-CU: ; %bb.0: ; %entry 13920; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 13921; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13922; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13923; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13924; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13925; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13926; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 13927; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13928; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 13929; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13930; GFX11-CU-NEXT: s_endpgm 13931; 13932; GFX12-WGP-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13933; GFX12-WGP: ; %bb.0: ; %entry 13934; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 13935; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13936; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 13937; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 13938; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13939; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13940; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 13941; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13942; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 13943; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13944; GFX12-WGP-NEXT: s_endpgm 13945; 13946; GFX12-CU-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: 13947; GFX12-CU: ; %bb.0: ; %entry 13948; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 13949; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13950; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 13951; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 13952; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13953; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13954; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 13955; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 13956; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 13957; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 13958; GFX12-CU-NEXT: s_endpgm 13959 ptr addrspace(1) %out, i32 %in, i32 %old) { 13960entry: 13961 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 13962 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire 13963 ret void 13964} 13965 13966define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( 13967; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 13968; GFX6: ; %bb.0: ; %entry 13969; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 13970; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 13971; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 13972; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 13973; GFX6-NEXT: s_waitcnt lgkmcnt(0) 13974; GFX6-NEXT: s_mov_b32 s12, s5 13975; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 13976; GFX6-NEXT: s_mov_b32 s10, 0x100f000 13977; GFX6-NEXT: s_mov_b32 s11, -1 13978; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 13979; GFX6-NEXT: s_mov_b32 s5, s12 13980; GFX6-NEXT: s_mov_b32 s6, s11 13981; GFX6-NEXT: s_mov_b32 s7, s10 13982; GFX6-NEXT: v_mov_b32_e32 v0, s9 13983; GFX6-NEXT: v_mov_b32_e32 v2, s8 13984; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 13985; GFX6-NEXT: v_mov_b32_e32 v1, v2 13986; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 13987; GFX6-NEXT: s_endpgm 13988; 13989; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 13990; GFX7: ; %bb.0: ; %entry 13991; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 13992; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 13993; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 13994; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 13995; GFX7-NEXT: s_mov_b64 s[10:11], 16 13996; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13997; GFX7-NEXT: s_mov_b32 s4, s8 13998; GFX7-NEXT: s_mov_b32 s5, s9 13999; GFX7-NEXT: s_mov_b32 s9, s10 14000; GFX7-NEXT: s_mov_b32 s8, s11 14001; GFX7-NEXT: s_add_u32 s4, s4, s9 14002; GFX7-NEXT: s_addc_u32 s8, s5, s8 14003; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14004; GFX7-NEXT: s_mov_b32 s5, s8 14005; GFX7-NEXT: v_mov_b32_e32 v2, s7 14006; GFX7-NEXT: v_mov_b32_e32 v0, s6 14007; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14008; GFX7-NEXT: v_mov_b32_e32 v3, v0 14009; GFX7-NEXT: v_mov_b32_e32 v0, s4 14010; GFX7-NEXT: v_mov_b32_e32 v1, s5 14011; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14012; GFX7-NEXT: s_endpgm 14013; 14014; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14015; GFX10-WGP: ; %bb.0: ; %entry 14016; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14017; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14018; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14019; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14020; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14021; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14022; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14023; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14024; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14025; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14026; GFX10-WGP-NEXT: s_endpgm 14027; 14028; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14029; GFX10-CU: ; %bb.0: ; %entry 14030; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14031; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14032; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14033; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14034; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14035; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14036; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14037; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14038; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14039; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14040; GFX10-CU-NEXT: s_endpgm 14041; 14042; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14043; SKIP-CACHE-INV: ; %bb.0: ; %entry 14044; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14045; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14046; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14047; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14048; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14049; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14050; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14051; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14052; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14053; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14054; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14055; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14056; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14057; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14058; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14059; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14060; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14061; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14062; SKIP-CACHE-INV-NEXT: s_endpgm 14063; 14064; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14065; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14066; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14067; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14068; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14069; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14070; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14071; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14072; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14073; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14074; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14075; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14076; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14077; 14078; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14079; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14080; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14081; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14082; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14083; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14084; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14085; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14086; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14087; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14088; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14089; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14090; GFX90A-TGSPLIT-NEXT: s_endpgm 14091; 14092; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14093; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14094; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14095; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14096; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14097; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14098; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14099; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14100; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14101; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14102; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14103; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14104; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14105; 14106; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14107; GFX940-TGSPLIT: ; %bb.0: ; %entry 14108; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14109; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14110; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14111; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14112; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14113; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14114; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14115; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14116; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14117; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14118; GFX940-TGSPLIT-NEXT: s_endpgm 14119; 14120; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14121; GFX11-WGP: ; %bb.0: ; %entry 14122; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14123; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14124; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14125; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14126; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14127; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14128; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14129; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14130; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14131; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14132; GFX11-WGP-NEXT: s_endpgm 14133; 14134; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14135; GFX11-CU: ; %bb.0: ; %entry 14136; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14137; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14138; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14139; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14140; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14141; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14142; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14143; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14144; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14145; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14146; GFX11-CU-NEXT: s_endpgm 14147; 14148; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14149; GFX12-WGP: ; %bb.0: ; %entry 14150; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14151; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14152; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14153; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14154; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14155; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14156; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14157; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14158; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14159; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14160; GFX12-WGP-NEXT: s_endpgm 14161; 14162; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: 14163; GFX12-CU: ; %bb.0: ; %entry 14164; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14165; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14166; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14167; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14168; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14169; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14170; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14171; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14172; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14173; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14174; GFX12-CU-NEXT: s_endpgm 14175 ptr addrspace(1) %out, i32 %in, i32 %old) { 14176entry: 14177 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14178 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire 14179 ret void 14180} 14181 14182define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( 14183; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14184; GFX6: ; %bb.0: ; %entry 14185; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14186; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14187; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14188; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14189; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14190; GFX6-NEXT: s_mov_b32 s12, s5 14191; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14192; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14193; GFX6-NEXT: s_mov_b32 s11, -1 14194; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14195; GFX6-NEXT: s_mov_b32 s5, s12 14196; GFX6-NEXT: s_mov_b32 s6, s11 14197; GFX6-NEXT: s_mov_b32 s7, s10 14198; GFX6-NEXT: v_mov_b32_e32 v0, s9 14199; GFX6-NEXT: v_mov_b32_e32 v2, s8 14200; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14201; GFX6-NEXT: v_mov_b32_e32 v1, v2 14202; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14203; GFX6-NEXT: s_endpgm 14204; 14205; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14206; GFX7: ; %bb.0: ; %entry 14207; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14208; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14209; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14210; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14211; GFX7-NEXT: s_mov_b64 s[10:11], 16 14212; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14213; GFX7-NEXT: s_mov_b32 s4, s8 14214; GFX7-NEXT: s_mov_b32 s5, s9 14215; GFX7-NEXT: s_mov_b32 s9, s10 14216; GFX7-NEXT: s_mov_b32 s8, s11 14217; GFX7-NEXT: s_add_u32 s4, s4, s9 14218; GFX7-NEXT: s_addc_u32 s8, s5, s8 14219; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14220; GFX7-NEXT: s_mov_b32 s5, s8 14221; GFX7-NEXT: v_mov_b32_e32 v2, s7 14222; GFX7-NEXT: v_mov_b32_e32 v0, s6 14223; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14224; GFX7-NEXT: v_mov_b32_e32 v3, v0 14225; GFX7-NEXT: v_mov_b32_e32 v0, s4 14226; GFX7-NEXT: v_mov_b32_e32 v1, s5 14227; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14228; GFX7-NEXT: s_endpgm 14229; 14230; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14231; GFX10-WGP: ; %bb.0: ; %entry 14232; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14233; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14234; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14235; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14236; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14237; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14238; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14239; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14240; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14241; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14242; GFX10-WGP-NEXT: s_endpgm 14243; 14244; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14245; GFX10-CU: ; %bb.0: ; %entry 14246; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14247; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14248; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14249; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14250; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14251; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14252; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14253; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14254; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14255; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14256; GFX10-CU-NEXT: s_endpgm 14257; 14258; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14259; SKIP-CACHE-INV: ; %bb.0: ; %entry 14260; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14261; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14262; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14263; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14265; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14266; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14267; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14268; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14269; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14270; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14271; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14272; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14273; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14274; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14275; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14276; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14277; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14278; SKIP-CACHE-INV-NEXT: s_endpgm 14279; 14280; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14281; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14282; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14283; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14284; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14285; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14286; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14287; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14288; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14289; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14290; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14291; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14292; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14293; 14294; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14295; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14296; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14297; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14298; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14299; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14300; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14301; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14302; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14303; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14304; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14305; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14306; GFX90A-TGSPLIT-NEXT: s_endpgm 14307; 14308; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14309; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14310; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14311; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14312; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14313; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14314; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14315; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14316; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14317; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14318; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14319; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14320; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14321; 14322; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14323; GFX940-TGSPLIT: ; %bb.0: ; %entry 14324; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14325; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14326; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14327; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14328; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14329; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14330; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14331; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14332; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14333; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14334; GFX940-TGSPLIT-NEXT: s_endpgm 14335; 14336; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14337; GFX11-WGP: ; %bb.0: ; %entry 14338; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14339; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14340; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14341; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14342; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14343; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14344; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14345; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14346; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14347; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14348; GFX11-WGP-NEXT: s_endpgm 14349; 14350; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14351; GFX11-CU: ; %bb.0: ; %entry 14352; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14353; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14354; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14355; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14356; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14357; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14358; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14359; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14360; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14361; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14362; GFX11-CU-NEXT: s_endpgm 14363; 14364; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14365; GFX12-WGP: ; %bb.0: ; %entry 14366; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14367; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14368; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14369; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14370; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14371; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14372; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14373; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14374; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14375; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14376; GFX12-WGP-NEXT: s_endpgm 14377; 14378; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: 14379; GFX12-CU: ; %bb.0: ; %entry 14380; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14381; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14382; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14383; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14384; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14385; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14386; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14387; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14388; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14389; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14390; GFX12-CU-NEXT: s_endpgm 14391 ptr addrspace(1) %out, i32 %in, i32 %old) { 14392entry: 14393 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14394 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire 14395 ret void 14396} 14397 14398define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg( 14399; GFX6-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14400; GFX6: ; %bb.0: ; %entry 14401; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14402; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14403; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14404; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14405; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14406; GFX6-NEXT: s_mov_b32 s12, s5 14407; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14408; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14409; GFX6-NEXT: s_mov_b32 s11, -1 14410; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14411; GFX6-NEXT: s_mov_b32 s5, s12 14412; GFX6-NEXT: s_mov_b32 s6, s11 14413; GFX6-NEXT: s_mov_b32 s7, s10 14414; GFX6-NEXT: v_mov_b32_e32 v0, s9 14415; GFX6-NEXT: v_mov_b32_e32 v2, s8 14416; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14417; GFX6-NEXT: v_mov_b32_e32 v1, v2 14418; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14419; GFX6-NEXT: s_endpgm 14420; 14421; GFX7-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14422; GFX7: ; %bb.0: ; %entry 14423; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14424; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14425; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14426; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14427; GFX7-NEXT: s_mov_b64 s[10:11], 16 14428; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14429; GFX7-NEXT: s_mov_b32 s4, s8 14430; GFX7-NEXT: s_mov_b32 s5, s9 14431; GFX7-NEXT: s_mov_b32 s9, s10 14432; GFX7-NEXT: s_mov_b32 s8, s11 14433; GFX7-NEXT: s_add_u32 s4, s4, s9 14434; GFX7-NEXT: s_addc_u32 s8, s5, s8 14435; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14436; GFX7-NEXT: s_mov_b32 s5, s8 14437; GFX7-NEXT: v_mov_b32_e32 v2, s7 14438; GFX7-NEXT: v_mov_b32_e32 v0, s6 14439; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14440; GFX7-NEXT: v_mov_b32_e32 v3, v0 14441; GFX7-NEXT: v_mov_b32_e32 v0, s4 14442; GFX7-NEXT: v_mov_b32_e32 v1, s5 14443; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14444; GFX7-NEXT: s_endpgm 14445; 14446; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14447; GFX10-WGP: ; %bb.0: ; %entry 14448; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14449; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14450; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14451; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14452; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14453; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14454; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14455; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14456; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14457; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14458; GFX10-WGP-NEXT: s_endpgm 14459; 14460; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14461; GFX10-CU: ; %bb.0: ; %entry 14462; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14463; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14464; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14465; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14466; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14467; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14468; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14469; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14470; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14471; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14472; GFX10-CU-NEXT: s_endpgm 14473; 14474; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14475; SKIP-CACHE-INV: ; %bb.0: ; %entry 14476; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14477; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14478; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14479; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14480; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14481; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14482; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14483; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14484; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14485; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14486; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14487; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14488; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14489; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14490; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14491; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14492; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14493; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14494; SKIP-CACHE-INV-NEXT: s_endpgm 14495; 14496; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14497; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14498; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14499; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14500; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14501; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14502; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14503; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14504; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14505; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14506; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14507; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14508; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14509; 14510; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14511; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14512; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14513; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14514; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14515; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14516; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14517; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14518; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14519; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14520; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14521; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14522; GFX90A-TGSPLIT-NEXT: s_endpgm 14523; 14524; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14525; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14526; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14527; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14528; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14529; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14530; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14531; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14532; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14533; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14534; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14535; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14536; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14537; 14538; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14539; GFX940-TGSPLIT: ; %bb.0: ; %entry 14540; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14541; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14542; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14543; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14544; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14545; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14546; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14547; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14548; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14549; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14550; GFX940-TGSPLIT-NEXT: s_endpgm 14551; 14552; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14553; GFX11-WGP: ; %bb.0: ; %entry 14554; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14555; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14556; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14557; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14558; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14559; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14560; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14561; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14562; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14563; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14564; GFX11-WGP-NEXT: s_endpgm 14565; 14566; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14567; GFX11-CU: ; %bb.0: ; %entry 14568; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14569; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14570; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14571; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14572; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14573; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14574; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14575; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14576; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14577; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14578; GFX11-CU-NEXT: s_endpgm 14579; 14580; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14581; GFX12-WGP: ; %bb.0: ; %entry 14582; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14583; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14584; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14585; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14586; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14587; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14588; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14589; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14590; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14591; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14592; GFX12-WGP-NEXT: s_endpgm 14593; 14594; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: 14595; GFX12-CU: ; %bb.0: ; %entry 14596; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14597; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14598; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14599; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14600; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14601; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14602; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14603; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14604; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14605; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14606; GFX12-CU-NEXT: s_endpgm 14607 ptr addrspace(1) %out, i32 %in, i32 %old) { 14608entry: 14609 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14610 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst 14611 ret void 14612} 14613 14614define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg( 14615; GFX6-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14616; GFX6: ; %bb.0: ; %entry 14617; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14618; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14619; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14620; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14621; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14622; GFX6-NEXT: s_mov_b32 s12, s5 14623; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14624; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14625; GFX6-NEXT: s_mov_b32 s11, -1 14626; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14627; GFX6-NEXT: s_mov_b32 s5, s12 14628; GFX6-NEXT: s_mov_b32 s6, s11 14629; GFX6-NEXT: s_mov_b32 s7, s10 14630; GFX6-NEXT: v_mov_b32_e32 v0, s9 14631; GFX6-NEXT: v_mov_b32_e32 v2, s8 14632; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14633; GFX6-NEXT: v_mov_b32_e32 v1, v2 14634; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14635; GFX6-NEXT: s_endpgm 14636; 14637; GFX7-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14638; GFX7: ; %bb.0: ; %entry 14639; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14640; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14641; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14642; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14643; GFX7-NEXT: s_mov_b64 s[10:11], 16 14644; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14645; GFX7-NEXT: s_mov_b32 s4, s8 14646; GFX7-NEXT: s_mov_b32 s5, s9 14647; GFX7-NEXT: s_mov_b32 s9, s10 14648; GFX7-NEXT: s_mov_b32 s8, s11 14649; GFX7-NEXT: s_add_u32 s4, s4, s9 14650; GFX7-NEXT: s_addc_u32 s8, s5, s8 14651; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14652; GFX7-NEXT: s_mov_b32 s5, s8 14653; GFX7-NEXT: v_mov_b32_e32 v2, s7 14654; GFX7-NEXT: v_mov_b32_e32 v0, s6 14655; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14656; GFX7-NEXT: v_mov_b32_e32 v3, v0 14657; GFX7-NEXT: v_mov_b32_e32 v0, s4 14658; GFX7-NEXT: v_mov_b32_e32 v1, s5 14659; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14660; GFX7-NEXT: s_endpgm 14661; 14662; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14663; GFX10-WGP: ; %bb.0: ; %entry 14664; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14665; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14666; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14667; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14668; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14669; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14670; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14671; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14672; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14673; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14674; GFX10-WGP-NEXT: s_endpgm 14675; 14676; GFX10-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14677; GFX10-CU: ; %bb.0: ; %entry 14678; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14679; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14680; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14681; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14682; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14683; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14684; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14685; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14686; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14687; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14688; GFX10-CU-NEXT: s_endpgm 14689; 14690; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14691; SKIP-CACHE-INV: ; %bb.0: ; %entry 14692; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14693; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14694; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14695; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14696; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14697; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14698; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14699; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14700; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14701; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14702; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14703; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14704; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14705; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14706; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14707; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14708; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14709; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14710; SKIP-CACHE-INV-NEXT: s_endpgm 14711; 14712; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14713; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14714; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14715; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14716; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14717; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14718; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14719; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14720; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14721; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14722; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14723; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14724; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14725; 14726; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14727; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14728; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14729; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14730; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14731; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14732; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14733; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14734; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14735; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14736; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14737; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14738; GFX90A-TGSPLIT-NEXT: s_endpgm 14739; 14740; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14741; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14742; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14743; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14744; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14745; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14746; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14747; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14748; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14749; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14750; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14751; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14752; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14753; 14754; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14755; GFX940-TGSPLIT: ; %bb.0: ; %entry 14756; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14757; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14758; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14759; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14760; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14761; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14762; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14763; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14764; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14765; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14766; GFX940-TGSPLIT-NEXT: s_endpgm 14767; 14768; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14769; GFX11-WGP: ; %bb.0: ; %entry 14770; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14771; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14772; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14773; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14774; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14775; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14776; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14777; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14778; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14779; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14780; GFX11-WGP-NEXT: s_endpgm 14781; 14782; GFX11-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14783; GFX11-CU: ; %bb.0: ; %entry 14784; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 14785; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14786; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14787; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14788; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14789; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 14790; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 14791; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14792; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 14793; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14794; GFX11-CU-NEXT: s_endpgm 14795; 14796; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14797; GFX12-WGP: ; %bb.0: ; %entry 14798; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 14799; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14800; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14801; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14802; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14803; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 14804; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 14805; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14806; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 14807; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14808; GFX12-WGP-NEXT: s_endpgm 14809; 14810; GFX12-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: 14811; GFX12-CU: ; %bb.0: ; %entry 14812; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 14813; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14814; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14815; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14816; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14817; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 14818; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 14819; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14820; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 14821; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14822; GFX12-CU-NEXT: s_endpgm 14823 ptr addrspace(1) %out, i32 %in, i32 %old) { 14824entry: 14825 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 14826 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst 14827 ret void 14828} 14829 14830define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg( 14831; GFX6-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14832; GFX6: ; %bb.0: ; %entry 14833; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 14834; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 14835; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 14836; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 14837; GFX6-NEXT: s_waitcnt lgkmcnt(0) 14838; GFX6-NEXT: s_mov_b32 s12, s5 14839; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 14840; GFX6-NEXT: s_mov_b32 s10, 0x100f000 14841; GFX6-NEXT: s_mov_b32 s11, -1 14842; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 14843; GFX6-NEXT: s_mov_b32 s5, s12 14844; GFX6-NEXT: s_mov_b32 s6, s11 14845; GFX6-NEXT: s_mov_b32 s7, s10 14846; GFX6-NEXT: v_mov_b32_e32 v0, s9 14847; GFX6-NEXT: v_mov_b32_e32 v2, s8 14848; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14849; GFX6-NEXT: v_mov_b32_e32 v1, v2 14850; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 14851; GFX6-NEXT: s_endpgm 14852; 14853; GFX7-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14854; GFX7: ; %bb.0: ; %entry 14855; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14856; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14857; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14858; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14859; GFX7-NEXT: s_mov_b64 s[10:11], 16 14860; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14861; GFX7-NEXT: s_mov_b32 s4, s8 14862; GFX7-NEXT: s_mov_b32 s5, s9 14863; GFX7-NEXT: s_mov_b32 s9, s10 14864; GFX7-NEXT: s_mov_b32 s8, s11 14865; GFX7-NEXT: s_add_u32 s4, s4, s9 14866; GFX7-NEXT: s_addc_u32 s8, s5, s8 14867; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14868; GFX7-NEXT: s_mov_b32 s5, s8 14869; GFX7-NEXT: v_mov_b32_e32 v2, s7 14870; GFX7-NEXT: v_mov_b32_e32 v0, s6 14871; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14872; GFX7-NEXT: v_mov_b32_e32 v3, v0 14873; GFX7-NEXT: v_mov_b32_e32 v0, s4 14874; GFX7-NEXT: v_mov_b32_e32 v1, s5 14875; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14876; GFX7-NEXT: s_endpgm 14877; 14878; GFX10-WGP-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14879; GFX10-WGP: ; %bb.0: ; %entry 14880; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 14881; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14882; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 14883; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 14884; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14885; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 14886; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 14887; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14888; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 14889; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14890; GFX10-WGP-NEXT: s_endpgm 14891; 14892; GFX10-CU-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14893; GFX10-CU: ; %bb.0: ; %entry 14894; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 14895; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14896; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 14897; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 14898; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14899; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 14900; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 14901; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14902; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 14903; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 14904; GFX10-CU-NEXT: s_endpgm 14905; 14906; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14907; SKIP-CACHE-INV: ; %bb.0: ; %entry 14908; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 14909; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 14910; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 14911; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 14912; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14913; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 14914; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 14915; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 14916; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 14917; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 14918; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 14919; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 14920; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 14921; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 14922; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 14923; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 14924; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 14925; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 14926; SKIP-CACHE-INV-NEXT: s_endpgm 14927; 14928; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14929; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14930; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14931; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14932; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14933; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14934; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14935; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14936; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14937; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14938; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14939; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14940; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14941; 14942; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14943; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14944; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14945; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14946; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14947; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14948; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14949; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14950; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 14951; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14952; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14953; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 14954; GFX90A-TGSPLIT-NEXT: s_endpgm 14955; 14956; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14957; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14958; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14959; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14960; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14961; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14962; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14963; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14964; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14965; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14966; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14967; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14968; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14969; 14970; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14971; GFX940-TGSPLIT: ; %bb.0: ; %entry 14972; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 14973; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14974; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14975; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14976; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14977; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14978; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 14979; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14980; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 14981; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 14982; GFX940-TGSPLIT-NEXT: s_endpgm 14983; 14984; GFX11-WGP-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14985; GFX11-WGP: ; %bb.0: ; %entry 14986; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 14987; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14988; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14989; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14990; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14991; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 14992; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 14993; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 14994; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 14995; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 14996; GFX11-WGP-NEXT: s_endpgm 14997; 14998; GFX11-CU-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 14999; GFX11-CU: ; %bb.0: ; %entry 15000; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15001; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15002; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15003; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15004; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15005; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15006; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15007; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15008; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15009; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15010; GFX11-CU-NEXT: s_endpgm 15011; 15012; GFX12-WGP-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 15013; GFX12-WGP: ; %bb.0: ; %entry 15014; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15015; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15016; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15017; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15018; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15019; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15020; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15021; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15022; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15023; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15024; GFX12-WGP-NEXT: s_endpgm 15025; 15026; GFX12-CU-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: 15027; GFX12-CU: ; %bb.0: ; %entry 15028; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15029; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15030; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15031; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15032; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15033; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15034; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15035; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15036; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15037; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15038; GFX12-CU-NEXT: s_endpgm 15039 ptr addrspace(1) %out, i32 %in, i32 %old) { 15040entry: 15041 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15042 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst 15043 ret void 15044} 15045 15046define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg( 15047; GFX6-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15048; GFX6: ; %bb.0: ; %entry 15049; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15050; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15051; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15052; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15053; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15054; GFX6-NEXT: s_mov_b32 s12, s5 15055; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15056; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15057; GFX6-NEXT: s_mov_b32 s11, -1 15058; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15059; GFX6-NEXT: s_mov_b32 s5, s12 15060; GFX6-NEXT: s_mov_b32 s6, s11 15061; GFX6-NEXT: s_mov_b32 s7, s10 15062; GFX6-NEXT: v_mov_b32_e32 v0, s9 15063; GFX6-NEXT: v_mov_b32_e32 v2, s8 15064; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15065; GFX6-NEXT: v_mov_b32_e32 v1, v2 15066; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15067; GFX6-NEXT: s_endpgm 15068; 15069; GFX7-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15070; GFX7: ; %bb.0: ; %entry 15071; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15072; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15073; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15074; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15075; GFX7-NEXT: s_mov_b64 s[10:11], 16 15076; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15077; GFX7-NEXT: s_mov_b32 s4, s8 15078; GFX7-NEXT: s_mov_b32 s5, s9 15079; GFX7-NEXT: s_mov_b32 s9, s10 15080; GFX7-NEXT: s_mov_b32 s8, s11 15081; GFX7-NEXT: s_add_u32 s4, s4, s9 15082; GFX7-NEXT: s_addc_u32 s8, s5, s8 15083; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15084; GFX7-NEXT: s_mov_b32 s5, s8 15085; GFX7-NEXT: v_mov_b32_e32 v2, s7 15086; GFX7-NEXT: v_mov_b32_e32 v0, s6 15087; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15088; GFX7-NEXT: v_mov_b32_e32 v3, v0 15089; GFX7-NEXT: v_mov_b32_e32 v0, s4 15090; GFX7-NEXT: v_mov_b32_e32 v1, s5 15091; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15092; GFX7-NEXT: s_endpgm 15093; 15094; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15095; GFX10-WGP: ; %bb.0: ; %entry 15096; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15097; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15098; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15099; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15100; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15101; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15102; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15103; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15104; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15105; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15106; GFX10-WGP-NEXT: s_endpgm 15107; 15108; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15109; GFX10-CU: ; %bb.0: ; %entry 15110; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15111; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15112; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15113; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15114; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15115; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15116; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15117; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15118; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15119; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15120; GFX10-CU-NEXT: s_endpgm 15121; 15122; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15123; SKIP-CACHE-INV: ; %bb.0: ; %entry 15124; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15125; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15126; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15127; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15128; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15129; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15130; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15131; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15132; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15133; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15134; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15135; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15136; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15137; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15138; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15139; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15140; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15141; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15142; SKIP-CACHE-INV-NEXT: s_endpgm 15143; 15144; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15145; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15146; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15147; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15148; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15149; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15150; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15151; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15152; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15153; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15154; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15155; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15156; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15157; 15158; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15159; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15160; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15161; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15162; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15163; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15164; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15165; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15166; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15167; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15168; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15169; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15170; GFX90A-TGSPLIT-NEXT: s_endpgm 15171; 15172; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15173; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15174; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15175; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15176; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15177; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15178; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15179; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15180; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15181; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15182; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15183; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15184; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15185; 15186; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15187; GFX940-TGSPLIT: ; %bb.0: ; %entry 15188; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15189; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15190; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15191; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15192; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15193; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15194; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15195; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15196; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15197; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15198; GFX940-TGSPLIT-NEXT: s_endpgm 15199; 15200; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15201; GFX11-WGP: ; %bb.0: ; %entry 15202; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15203; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15204; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15205; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15206; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15207; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15208; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15209; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15210; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15211; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15212; GFX11-WGP-NEXT: s_endpgm 15213; 15214; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15215; GFX11-CU: ; %bb.0: ; %entry 15216; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15217; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15218; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15219; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15220; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15221; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15222; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15223; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15224; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15225; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15226; GFX11-CU-NEXT: s_endpgm 15227; 15228; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15229; GFX12-WGP: ; %bb.0: ; %entry 15230; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15231; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15232; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15233; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15234; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15235; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15236; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15237; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15238; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15239; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15240; GFX12-WGP-NEXT: s_endpgm 15241; 15242; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: 15243; GFX12-CU: ; %bb.0: ; %entry 15244; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15245; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15246; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15247; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15248; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15249; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15250; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15251; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15252; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15253; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15254; GFX12-CU-NEXT: s_endpgm 15255 ptr addrspace(1) %out, i32 %in, i32 %old) { 15256entry: 15257 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15258 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst 15259 ret void 15260} 15261 15262define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( 15263; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15264; GFX6: ; %bb.0: ; %entry 15265; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15266; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15267; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15268; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15269; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15270; GFX6-NEXT: s_mov_b32 s12, s5 15271; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15272; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15273; GFX6-NEXT: s_mov_b32 s11, -1 15274; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15275; GFX6-NEXT: s_mov_b32 s5, s12 15276; GFX6-NEXT: s_mov_b32 s6, s11 15277; GFX6-NEXT: s_mov_b32 s7, s10 15278; GFX6-NEXT: v_mov_b32_e32 v0, s9 15279; GFX6-NEXT: v_mov_b32_e32 v2, s8 15280; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15281; GFX6-NEXT: v_mov_b32_e32 v1, v2 15282; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 15283; GFX6-NEXT: s_endpgm 15284; 15285; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15286; GFX7: ; %bb.0: ; %entry 15287; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15288; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15289; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15290; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15291; GFX7-NEXT: s_mov_b64 s[10:11], 16 15292; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15293; GFX7-NEXT: s_mov_b32 s4, s8 15294; GFX7-NEXT: s_mov_b32 s5, s9 15295; GFX7-NEXT: s_mov_b32 s9, s10 15296; GFX7-NEXT: s_mov_b32 s8, s11 15297; GFX7-NEXT: s_add_u32 s4, s4, s9 15298; GFX7-NEXT: s_addc_u32 s8, s5, s8 15299; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15300; GFX7-NEXT: s_mov_b32 s5, s8 15301; GFX7-NEXT: v_mov_b32_e32 v2, s7 15302; GFX7-NEXT: v_mov_b32_e32 v0, s6 15303; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15304; GFX7-NEXT: v_mov_b32_e32 v3, v0 15305; GFX7-NEXT: v_mov_b32_e32 v0, s4 15306; GFX7-NEXT: v_mov_b32_e32 v1, s5 15307; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15308; GFX7-NEXT: s_endpgm 15309; 15310; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15311; GFX10-WGP: ; %bb.0: ; %entry 15312; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15313; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15314; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15315; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15316; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15317; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15318; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15319; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15320; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15321; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15322; GFX10-WGP-NEXT: s_endpgm 15323; 15324; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15325; GFX10-CU: ; %bb.0: ; %entry 15326; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15327; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15328; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15329; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15330; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15331; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15332; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15333; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15334; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15335; GFX10-CU-NEXT: global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16 15336; GFX10-CU-NEXT: s_endpgm 15337; 15338; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15339; SKIP-CACHE-INV: ; %bb.0: ; %entry 15340; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15341; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15342; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15343; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15344; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15345; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15346; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15347; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15348; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15349; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15350; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15351; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15352; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15353; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15354; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15355; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15356; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15357; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 15358; SKIP-CACHE-INV-NEXT: s_endpgm 15359; 15360; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15361; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15362; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15363; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15364; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15365; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15366; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15367; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15368; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15369; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15370; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15371; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15372; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15373; 15374; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15375; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15376; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15377; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15378; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15379; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15380; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15381; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15382; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15383; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15384; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15385; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 15386; GFX90A-TGSPLIT-NEXT: s_endpgm 15387; 15388; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15389; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15390; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15391; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15392; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15393; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15394; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15395; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15396; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15397; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15398; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15399; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15400; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15401; 15402; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15403; GFX940-TGSPLIT: ; %bb.0: ; %entry 15404; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15405; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15406; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15407; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15408; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15409; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15410; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15411; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15412; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15413; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 15414; GFX940-TGSPLIT-NEXT: s_endpgm 15415; 15416; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15417; GFX11-WGP: ; %bb.0: ; %entry 15418; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15419; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15420; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15421; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15422; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15423; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15424; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15425; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15426; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15427; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15428; GFX11-WGP-NEXT: s_endpgm 15429; 15430; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15431; GFX11-CU: ; %bb.0: ; %entry 15432; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15433; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15434; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15435; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15436; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15437; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15438; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15439; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15440; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15441; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15442; GFX11-CU-NEXT: s_endpgm 15443; 15444; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15445; GFX12-WGP: ; %bb.0: ; %entry 15446; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15447; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15448; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15449; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15450; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15451; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15452; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15453; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15454; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15455; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15456; GFX12-WGP-NEXT: s_endpgm 15457; 15458; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: 15459; GFX12-CU: ; %bb.0: ; %entry 15460; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15461; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15462; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15463; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15464; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15465; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15466; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15467; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15468; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15469; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 15470; GFX12-CU-NEXT: s_endpgm 15471 ptr addrspace(1) %out, i32 %in, i32 %old) { 15472entry: 15473 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15474 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst 15475 ret void 15476} 15477 15478define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg( 15479; GFX6-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15480; GFX6: ; %bb.0: ; %entry 15481; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15482; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15483; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15484; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15485; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15486; GFX6-NEXT: s_mov_b32 s12, s5 15487; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15488; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15489; GFX6-NEXT: s_mov_b32 s11, -1 15490; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15491; GFX6-NEXT: s_mov_b32 s5, s12 15492; GFX6-NEXT: s_mov_b32 s6, s11 15493; GFX6-NEXT: s_mov_b32 s7, s10 15494; GFX6-NEXT: v_mov_b32_e32 v0, s9 15495; GFX6-NEXT: v_mov_b32_e32 v2, s8 15496; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15497; GFX6-NEXT: v_mov_b32_e32 v1, v2 15498; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 15499; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 15500; GFX6-NEXT: s_waitcnt vmcnt(0) 15501; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 15502; GFX6-NEXT: s_endpgm 15503; 15504; GFX7-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15505; GFX7: ; %bb.0: ; %entry 15506; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 15507; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15508; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 15509; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 15510; GFX7-NEXT: s_mov_b64 s[12:13], 16 15511; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15512; GFX7-NEXT: s_mov_b32 s6, s4 15513; GFX7-NEXT: s_mov_b32 s7, s5 15514; GFX7-NEXT: s_mov_b32 s11, s12 15515; GFX7-NEXT: s_mov_b32 s10, s13 15516; GFX7-NEXT: s_add_u32 s6, s6, s11 15517; GFX7-NEXT: s_addc_u32 s10, s7, s10 15518; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 15519; GFX7-NEXT: s_mov_b32 s7, s10 15520; GFX7-NEXT: v_mov_b32_e32 v2, s9 15521; GFX7-NEXT: v_mov_b32_e32 v0, s8 15522; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15523; GFX7-NEXT: v_mov_b32_e32 v3, v0 15524; GFX7-NEXT: v_mov_b32_e32 v0, s6 15525; GFX7-NEXT: v_mov_b32_e32 v1, s7 15526; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 15527; GFX7-NEXT: v_mov_b32_e32 v0, s4 15528; GFX7-NEXT: v_mov_b32_e32 v1, s5 15529; GFX7-NEXT: s_waitcnt vmcnt(0) 15530; GFX7-NEXT: flat_store_dword v[0:1], v2 15531; GFX7-NEXT: s_endpgm 15532; 15533; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15534; GFX10-WGP: ; %bb.0: ; %entry 15535; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15536; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15537; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15538; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15539; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15540; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15541; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15542; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15543; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15544; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 15545; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15546; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 15547; GFX10-WGP-NEXT: s_endpgm 15548; 15549; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15550; GFX10-CU: ; %bb.0: ; %entry 15551; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15552; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15553; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15554; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15555; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15556; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15557; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15558; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15559; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15560; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 15561; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15562; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 15563; GFX10-CU-NEXT: s_endpgm 15564; 15565; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15566; SKIP-CACHE-INV: ; %bb.0: ; %entry 15567; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15568; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15569; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15570; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15571; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15572; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15573; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15574; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15575; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15576; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15577; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15578; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15579; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15580; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15581; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15582; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15583; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15584; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 15585; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 15586; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15587; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 15588; SKIP-CACHE-INV-NEXT: s_endpgm 15589; 15590; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15591; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15592; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15593; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15594; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15595; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15596; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15597; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15598; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15599; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15600; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15601; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 15602; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15603; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 15604; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15605; 15606; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15607; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15608; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15609; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15610; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15611; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15612; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15613; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15614; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15615; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15616; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15617; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 15618; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15619; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 15620; GFX90A-TGSPLIT-NEXT: s_endpgm 15621; 15622; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15623; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15624; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15625; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15626; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15627; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15628; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15629; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15630; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15631; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15632; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15633; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 15634; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15635; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 15636; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15637; 15638; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15639; GFX940-TGSPLIT: ; %bb.0: ; %entry 15640; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15641; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15642; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15643; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15644; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15645; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15646; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15647; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15648; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15649; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 15650; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15651; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 15652; GFX940-TGSPLIT-NEXT: s_endpgm 15653; 15654; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15655; GFX11-WGP: ; %bb.0: ; %entry 15656; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15657; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15658; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15659; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15660; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15661; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15662; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15663; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15664; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15665; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 15666; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15667; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 15668; GFX11-WGP-NEXT: s_endpgm 15669; 15670; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15671; GFX11-CU: ; %bb.0: ; %entry 15672; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15673; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15674; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15675; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15676; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15677; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15678; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15679; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15680; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15681; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 15682; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15683; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 15684; GFX11-CU-NEXT: s_endpgm 15685; 15686; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15687; GFX12-WGP: ; %bb.0: ; %entry 15688; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15689; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15690; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15691; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15692; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15693; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15694; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15695; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15696; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15697; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 15698; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15699; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 15700; GFX12-WGP-NEXT: s_endpgm 15701; 15702; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: 15703; GFX12-CU: ; %bb.0: ; %entry 15704; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15705; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15706; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15707; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15708; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15709; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15710; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15711; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15712; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15713; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 15714; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15715; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 15716; GFX12-CU-NEXT: s_endpgm 15717 ptr addrspace(1) %out, i32 %in, i32 %old) { 15718entry: 15719 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15720 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic 15721 %val0 = extractvalue { i32, i1 } %val, 0 15722 store i32 %val0, ptr addrspace(1) %out, align 4 15723 ret void 15724} 15725 15726define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg( 15727; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15728; GFX6: ; %bb.0: ; %entry 15729; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15730; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15731; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15732; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15733; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15734; GFX6-NEXT: s_mov_b32 s12, s5 15735; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15736; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15737; GFX6-NEXT: s_mov_b32 s11, -1 15738; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15739; GFX6-NEXT: s_mov_b32 s5, s12 15740; GFX6-NEXT: s_mov_b32 s6, s11 15741; GFX6-NEXT: s_mov_b32 s7, s10 15742; GFX6-NEXT: v_mov_b32_e32 v0, s9 15743; GFX6-NEXT: v_mov_b32_e32 v2, s8 15744; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15745; GFX6-NEXT: v_mov_b32_e32 v1, v2 15746; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 15747; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 15748; GFX6-NEXT: s_waitcnt vmcnt(0) 15749; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 15750; GFX6-NEXT: s_endpgm 15751; 15752; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15753; GFX7: ; %bb.0: ; %entry 15754; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 15755; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15756; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 15757; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 15758; GFX7-NEXT: s_mov_b64 s[12:13], 16 15759; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15760; GFX7-NEXT: s_mov_b32 s6, s4 15761; GFX7-NEXT: s_mov_b32 s7, s5 15762; GFX7-NEXT: s_mov_b32 s11, s12 15763; GFX7-NEXT: s_mov_b32 s10, s13 15764; GFX7-NEXT: s_add_u32 s6, s6, s11 15765; GFX7-NEXT: s_addc_u32 s10, s7, s10 15766; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 15767; GFX7-NEXT: s_mov_b32 s7, s10 15768; GFX7-NEXT: v_mov_b32_e32 v2, s9 15769; GFX7-NEXT: v_mov_b32_e32 v0, s8 15770; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15771; GFX7-NEXT: v_mov_b32_e32 v3, v0 15772; GFX7-NEXT: v_mov_b32_e32 v0, s6 15773; GFX7-NEXT: v_mov_b32_e32 v1, s7 15774; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 15775; GFX7-NEXT: v_mov_b32_e32 v0, s4 15776; GFX7-NEXT: v_mov_b32_e32 v1, s5 15777; GFX7-NEXT: s_waitcnt vmcnt(0) 15778; GFX7-NEXT: flat_store_dword v[0:1], v2 15779; GFX7-NEXT: s_endpgm 15780; 15781; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15782; GFX10-WGP: ; %bb.0: ; %entry 15783; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 15784; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15785; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 15786; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 15787; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15788; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 15789; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 15790; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15791; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 15792; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 15793; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15794; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 15795; GFX10-WGP-NEXT: s_endpgm 15796; 15797; GFX10-CU-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15798; GFX10-CU: ; %bb.0: ; %entry 15799; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 15800; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15801; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 15802; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 15803; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15804; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 15805; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 15806; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15807; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 15808; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 15809; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15810; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 15811; GFX10-CU-NEXT: s_endpgm 15812; 15813; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15814; SKIP-CACHE-INV: ; %bb.0: ; %entry 15815; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 15816; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 15817; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 15818; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 15819; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15820; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 15821; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 15822; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 15823; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 15824; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 15825; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 15826; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 15827; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 15828; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 15829; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 15830; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15831; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 15832; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 15833; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 15834; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15835; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 15836; SKIP-CACHE-INV-NEXT: s_endpgm 15837; 15838; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15839; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15840; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15841; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15842; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15843; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15844; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15845; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15846; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15847; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15848; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15849; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 15850; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15851; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 15852; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15853; 15854; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15855; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15856; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15857; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15858; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15859; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15860; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15861; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15862; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 15863; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15864; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15865; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 15866; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15867; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 15868; GFX90A-TGSPLIT-NEXT: s_endpgm 15869; 15870; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15871; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15872; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15873; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15874; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15875; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15876; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15877; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15878; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15879; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15880; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15881; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 15882; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15883; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 15884; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15885; 15886; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15887; GFX940-TGSPLIT: ; %bb.0: ; %entry 15888; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 15889; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15890; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15891; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15892; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15893; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15894; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 15895; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15896; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 15897; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 15898; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15899; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 15900; GFX940-TGSPLIT-NEXT: s_endpgm 15901; 15902; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15903; GFX11-WGP: ; %bb.0: ; %entry 15904; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 15905; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15906; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15907; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15908; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15909; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 15910; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 15911; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15912; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 15913; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 15914; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15915; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 15916; GFX11-WGP-NEXT: s_endpgm 15917; 15918; GFX11-CU-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15919; GFX11-CU: ; %bb.0: ; %entry 15920; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 15921; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15922; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15923; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15924; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15925; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 15926; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 15927; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15928; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 15929; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 15930; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15931; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 15932; GFX11-CU-NEXT: s_endpgm 15933; 15934; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15935; GFX12-WGP: ; %bb.0: ; %entry 15936; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 15937; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15938; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15939; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15940; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15941; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 15942; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 15943; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15944; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 15945; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 15946; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15947; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 15948; GFX12-WGP-NEXT: s_endpgm 15949; 15950; GFX12-CU-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: 15951; GFX12-CU: ; %bb.0: ; %entry 15952; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 15953; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15954; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15955; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15956; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15957; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 15958; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 15959; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 15960; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 15961; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 15962; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15963; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 15964; GFX12-CU-NEXT: s_endpgm 15965 ptr addrspace(1) %out, i32 %in, i32 %old) { 15966entry: 15967 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 15968 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic 15969 %val0 = extractvalue { i32, i1 } %val, 0 15970 store i32 %val0, ptr addrspace(1) %out, align 4 15971 ret void 15972} 15973 15974define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg( 15975; GFX6-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 15976; GFX6: ; %bb.0: ; %entry 15977; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 15978; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 15979; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 15980; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 15981; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15982; GFX6-NEXT: s_mov_b32 s12, s5 15983; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 15984; GFX6-NEXT: s_mov_b32 s10, 0x100f000 15985; GFX6-NEXT: s_mov_b32 s11, -1 15986; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 15987; GFX6-NEXT: s_mov_b32 s5, s12 15988; GFX6-NEXT: s_mov_b32 s6, s11 15989; GFX6-NEXT: s_mov_b32 s7, s10 15990; GFX6-NEXT: v_mov_b32_e32 v0, s9 15991; GFX6-NEXT: v_mov_b32_e32 v2, s8 15992; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 15993; GFX6-NEXT: v_mov_b32_e32 v1, v2 15994; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 15995; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 15996; GFX6-NEXT: s_waitcnt vmcnt(0) 15997; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 15998; GFX6-NEXT: s_endpgm 15999; 16000; GFX7-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16001; GFX7: ; %bb.0: ; %entry 16002; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 16003; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16004; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 16005; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 16006; GFX7-NEXT: s_mov_b64 s[12:13], 16 16007; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16008; GFX7-NEXT: s_mov_b32 s6, s4 16009; GFX7-NEXT: s_mov_b32 s7, s5 16010; GFX7-NEXT: s_mov_b32 s11, s12 16011; GFX7-NEXT: s_mov_b32 s10, s13 16012; GFX7-NEXT: s_add_u32 s6, s6, s11 16013; GFX7-NEXT: s_addc_u32 s10, s7, s10 16014; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 16015; GFX7-NEXT: s_mov_b32 s7, s10 16016; GFX7-NEXT: v_mov_b32_e32 v2, s9 16017; GFX7-NEXT: v_mov_b32_e32 v0, s8 16018; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16019; GFX7-NEXT: v_mov_b32_e32 v3, v0 16020; GFX7-NEXT: v_mov_b32_e32 v0, s6 16021; GFX7-NEXT: v_mov_b32_e32 v1, s7 16022; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 16023; GFX7-NEXT: v_mov_b32_e32 v0, s4 16024; GFX7-NEXT: v_mov_b32_e32 v1, s5 16025; GFX7-NEXT: s_waitcnt vmcnt(0) 16026; GFX7-NEXT: flat_store_dword v[0:1], v2 16027; GFX7-NEXT: s_endpgm 16028; 16029; GFX10-WGP-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16030; GFX10-WGP: ; %bb.0: ; %entry 16031; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16032; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16033; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16034; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16035; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16036; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16037; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16038; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16039; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16040; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16041; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16042; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 16043; GFX10-WGP-NEXT: s_endpgm 16044; 16045; GFX10-CU-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16046; GFX10-CU: ; %bb.0: ; %entry 16047; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16048; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16049; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16050; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16051; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16052; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16053; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16054; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16055; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16056; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16057; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16058; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 16059; GFX10-CU-NEXT: s_endpgm 16060; 16061; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16062; SKIP-CACHE-INV: ; %bb.0: ; %entry 16063; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16064; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16065; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16066; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16067; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16068; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16069; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16070; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16071; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16072; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16073; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16074; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16075; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16076; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16077; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16078; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16079; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16080; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 16081; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16082; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16083; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 16084; SKIP-CACHE-INV-NEXT: s_endpgm 16085; 16086; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16087; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16088; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16089; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16090; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16091; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16092; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16093; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16094; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16095; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16096; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16097; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16098; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16099; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16100; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16101; 16102; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16103; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16104; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16105; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16106; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16107; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16108; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16109; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16110; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16111; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16112; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16113; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16114; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16115; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16116; GFX90A-TGSPLIT-NEXT: s_endpgm 16117; 16118; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16119; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16120; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16121; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16122; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16123; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16124; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16125; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16126; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16127; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16128; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16129; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16130; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16131; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16132; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16133; 16134; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16135; GFX940-TGSPLIT: ; %bb.0: ; %entry 16136; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16137; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16138; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16139; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16140; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16141; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16142; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16143; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16144; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16145; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16146; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16147; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16148; GFX940-TGSPLIT-NEXT: s_endpgm 16149; 16150; GFX11-WGP-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16151; GFX11-WGP: ; %bb.0: ; %entry 16152; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16153; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16154; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16155; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16156; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16157; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16158; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16159; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16160; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16161; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16162; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16163; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16164; GFX11-WGP-NEXT: s_endpgm 16165; 16166; GFX11-CU-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16167; GFX11-CU: ; %bb.0: ; %entry 16168; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16169; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16170; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16171; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16172; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16173; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16174; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16175; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16176; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16177; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16178; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16179; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16180; GFX11-CU-NEXT: s_endpgm 16181; 16182; GFX12-WGP-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16183; GFX12-WGP: ; %bb.0: ; %entry 16184; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16185; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16186; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16187; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16188; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16189; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16190; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16191; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16192; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16193; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16194; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16195; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16196; GFX12-WGP-NEXT: s_endpgm 16197; 16198; GFX12-CU-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: 16199; GFX12-CU: ; %bb.0: ; %entry 16200; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16201; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16202; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16203; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16204; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16205; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16206; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16207; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16208; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16209; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16210; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16211; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16212; GFX12-CU-NEXT: s_endpgm 16213 ptr addrspace(1) %out, i32 %in, i32 %old) { 16214entry: 16215 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16216 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic 16217 %val0 = extractvalue { i32, i1 } %val, 0 16218 store i32 %val0, ptr addrspace(1) %out, align 4 16219 ret void 16220} 16221 16222define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( 16223; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16224; GFX6: ; %bb.0: ; %entry 16225; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16226; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16227; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16228; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16229; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16230; GFX6-NEXT: s_mov_b32 s12, s5 16231; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16232; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16233; GFX6-NEXT: s_mov_b32 s11, -1 16234; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16235; GFX6-NEXT: s_mov_b32 s5, s12 16236; GFX6-NEXT: s_mov_b32 s6, s11 16237; GFX6-NEXT: s_mov_b32 s7, s10 16238; GFX6-NEXT: v_mov_b32_e32 v0, s9 16239; GFX6-NEXT: v_mov_b32_e32 v2, s8 16240; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16241; GFX6-NEXT: v_mov_b32_e32 v1, v2 16242; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 16243; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16244; GFX6-NEXT: s_waitcnt vmcnt(0) 16245; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 16246; GFX6-NEXT: s_endpgm 16247; 16248; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16249; GFX7: ; %bb.0: ; %entry 16250; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 16251; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16252; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 16253; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 16254; GFX7-NEXT: s_mov_b64 s[12:13], 16 16255; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16256; GFX7-NEXT: s_mov_b32 s6, s4 16257; GFX7-NEXT: s_mov_b32 s7, s5 16258; GFX7-NEXT: s_mov_b32 s11, s12 16259; GFX7-NEXT: s_mov_b32 s10, s13 16260; GFX7-NEXT: s_add_u32 s6, s6, s11 16261; GFX7-NEXT: s_addc_u32 s10, s7, s10 16262; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 16263; GFX7-NEXT: s_mov_b32 s7, s10 16264; GFX7-NEXT: v_mov_b32_e32 v2, s9 16265; GFX7-NEXT: v_mov_b32_e32 v0, s8 16266; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16267; GFX7-NEXT: v_mov_b32_e32 v3, v0 16268; GFX7-NEXT: v_mov_b32_e32 v0, s6 16269; GFX7-NEXT: v_mov_b32_e32 v1, s7 16270; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 16271; GFX7-NEXT: v_mov_b32_e32 v0, s4 16272; GFX7-NEXT: v_mov_b32_e32 v1, s5 16273; GFX7-NEXT: s_waitcnt vmcnt(0) 16274; GFX7-NEXT: flat_store_dword v[0:1], v2 16275; GFX7-NEXT: s_endpgm 16276; 16277; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16278; GFX10-WGP: ; %bb.0: ; %entry 16279; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16280; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16281; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16282; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16283; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16284; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16285; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16286; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16287; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16288; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16289; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16290; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 16291; GFX10-WGP-NEXT: s_endpgm 16292; 16293; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16294; GFX10-CU: ; %bb.0: ; %entry 16295; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16296; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16297; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16298; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16299; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16300; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16301; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16302; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16303; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16304; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16305; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16306; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 16307; GFX10-CU-NEXT: s_endpgm 16308; 16309; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16310; SKIP-CACHE-INV: ; %bb.0: ; %entry 16311; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16312; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16313; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16314; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16315; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16316; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16317; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16318; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16319; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16320; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16321; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16322; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16323; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16324; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16325; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16326; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16327; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16328; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 16329; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16330; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16331; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 16332; SKIP-CACHE-INV-NEXT: s_endpgm 16333; 16334; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16335; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16336; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16337; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16338; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16339; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16340; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16341; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16342; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16343; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16344; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16345; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16346; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16347; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16348; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16349; 16350; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16351; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16352; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16353; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16354; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16355; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16356; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16357; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16358; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16359; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16360; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16361; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16362; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16363; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16364; GFX90A-TGSPLIT-NEXT: s_endpgm 16365; 16366; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16367; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16368; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16369; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16370; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16371; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16372; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16373; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16374; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16375; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16376; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16377; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16378; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16379; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16380; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16381; 16382; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16383; GFX940-TGSPLIT: ; %bb.0: ; %entry 16384; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16385; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16386; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16387; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16388; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16389; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16390; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16391; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16392; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16393; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16394; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16395; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16396; GFX940-TGSPLIT-NEXT: s_endpgm 16397; 16398; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16399; GFX11-WGP: ; %bb.0: ; %entry 16400; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16401; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16402; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16403; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16404; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16405; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16406; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16407; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16408; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16409; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16410; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16411; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16412; GFX11-WGP-NEXT: s_endpgm 16413; 16414; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16415; GFX11-CU: ; %bb.0: ; %entry 16416; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16417; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16418; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16419; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16420; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16421; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16422; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16423; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16424; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16425; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16426; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16427; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16428; GFX11-CU-NEXT: s_endpgm 16429; 16430; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16431; GFX12-WGP: ; %bb.0: ; %entry 16432; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16433; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16434; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16435; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16436; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16437; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16438; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16439; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16440; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16441; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16442; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16443; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16444; GFX12-WGP-NEXT: s_endpgm 16445; 16446; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: 16447; GFX12-CU: ; %bb.0: ; %entry 16448; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16449; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16450; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16451; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16452; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16453; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16454; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16455; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16456; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16457; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16458; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16459; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16460; GFX12-CU-NEXT: s_endpgm 16461 ptr addrspace(1) %out, i32 %in, i32 %old) { 16462entry: 16463 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16464 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic 16465 %val0 = extractvalue { i32, i1 } %val, 0 16466 store i32 %val0, ptr addrspace(1) %out, align 4 16467 ret void 16468} 16469 16470define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( 16471; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16472; GFX6: ; %bb.0: ; %entry 16473; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16474; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16475; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16476; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16477; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16478; GFX6-NEXT: s_mov_b32 s12, s5 16479; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16480; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16481; GFX6-NEXT: s_mov_b32 s11, -1 16482; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16483; GFX6-NEXT: s_mov_b32 s5, s12 16484; GFX6-NEXT: s_mov_b32 s6, s11 16485; GFX6-NEXT: s_mov_b32 s7, s10 16486; GFX6-NEXT: v_mov_b32_e32 v0, s9 16487; GFX6-NEXT: v_mov_b32_e32 v2, s8 16488; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16489; GFX6-NEXT: v_mov_b32_e32 v1, v2 16490; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 16491; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16492; GFX6-NEXT: s_waitcnt vmcnt(0) 16493; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 16494; GFX6-NEXT: s_endpgm 16495; 16496; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16497; GFX7: ; %bb.0: ; %entry 16498; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 16499; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16500; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 16501; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 16502; GFX7-NEXT: s_mov_b64 s[12:13], 16 16503; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16504; GFX7-NEXT: s_mov_b32 s6, s4 16505; GFX7-NEXT: s_mov_b32 s7, s5 16506; GFX7-NEXT: s_mov_b32 s11, s12 16507; GFX7-NEXT: s_mov_b32 s10, s13 16508; GFX7-NEXT: s_add_u32 s6, s6, s11 16509; GFX7-NEXT: s_addc_u32 s10, s7, s10 16510; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 16511; GFX7-NEXT: s_mov_b32 s7, s10 16512; GFX7-NEXT: v_mov_b32_e32 v2, s9 16513; GFX7-NEXT: v_mov_b32_e32 v0, s8 16514; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16515; GFX7-NEXT: v_mov_b32_e32 v3, v0 16516; GFX7-NEXT: v_mov_b32_e32 v0, s6 16517; GFX7-NEXT: v_mov_b32_e32 v1, s7 16518; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 16519; GFX7-NEXT: v_mov_b32_e32 v0, s4 16520; GFX7-NEXT: v_mov_b32_e32 v1, s5 16521; GFX7-NEXT: s_waitcnt vmcnt(0) 16522; GFX7-NEXT: flat_store_dword v[0:1], v2 16523; GFX7-NEXT: s_endpgm 16524; 16525; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16526; GFX10-WGP: ; %bb.0: ; %entry 16527; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16528; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16529; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16530; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16531; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16532; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16533; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16534; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16535; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16536; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16537; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16538; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 16539; GFX10-WGP-NEXT: s_endpgm 16540; 16541; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16542; GFX10-CU: ; %bb.0: ; %entry 16543; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16544; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16545; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16546; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16547; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16548; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16549; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16550; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16551; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16552; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16553; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16554; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 16555; GFX10-CU-NEXT: s_endpgm 16556; 16557; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16558; SKIP-CACHE-INV: ; %bb.0: ; %entry 16559; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16560; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16561; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16562; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16563; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16564; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16565; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16566; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16567; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16568; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16569; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16570; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16571; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16572; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16573; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16574; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16575; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16576; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 16577; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16578; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16579; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 16580; SKIP-CACHE-INV-NEXT: s_endpgm 16581; 16582; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16583; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16584; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16585; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16586; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16587; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16588; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16589; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16590; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16591; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16592; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16593; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16594; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16595; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16596; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16597; 16598; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16599; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16600; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16601; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16602; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16603; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16604; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16605; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16606; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16607; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16608; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16609; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16610; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16611; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16612; GFX90A-TGSPLIT-NEXT: s_endpgm 16613; 16614; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16615; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16616; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16617; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16618; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16619; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16620; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16621; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16622; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16623; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16624; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16625; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16626; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16627; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16628; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16629; 16630; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16631; GFX940-TGSPLIT: ; %bb.0: ; %entry 16632; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16633; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16634; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16635; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16636; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16637; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16638; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16639; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16640; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16641; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16642; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16643; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16644; GFX940-TGSPLIT-NEXT: s_endpgm 16645; 16646; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16647; GFX11-WGP: ; %bb.0: ; %entry 16648; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16649; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16650; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16651; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16652; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16653; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16654; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16655; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16656; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16657; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16658; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16659; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16660; GFX11-WGP-NEXT: s_endpgm 16661; 16662; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16663; GFX11-CU: ; %bb.0: ; %entry 16664; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16665; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16666; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16667; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16668; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16669; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16670; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16671; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16672; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16673; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16674; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16675; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16676; GFX11-CU-NEXT: s_endpgm 16677; 16678; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16679; GFX12-WGP: ; %bb.0: ; %entry 16680; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16681; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16682; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16683; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16684; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16685; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16686; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16687; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16688; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16689; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16690; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16691; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16692; GFX12-WGP-NEXT: s_endpgm 16693; 16694; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: 16695; GFX12-CU: ; %bb.0: ; %entry 16696; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16697; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16698; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16699; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16700; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16701; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16702; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16703; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16704; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16705; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16706; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16707; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16708; GFX12-CU-NEXT: s_endpgm 16709 ptr addrspace(1) %out, i32 %in, i32 %old) { 16710entry: 16711 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16712 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic 16713 %val0 = extractvalue { i32, i1 } %val, 0 16714 store i32 %val0, ptr addrspace(1) %out, align 4 16715 ret void 16716} 16717 16718define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg( 16719; GFX6-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16720; GFX6: ; %bb.0: ; %entry 16721; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16722; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16723; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16724; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16725; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16726; GFX6-NEXT: s_mov_b32 s12, s5 16727; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16728; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16729; GFX6-NEXT: s_mov_b32 s11, -1 16730; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16731; GFX6-NEXT: s_mov_b32 s5, s12 16732; GFX6-NEXT: s_mov_b32 s6, s11 16733; GFX6-NEXT: s_mov_b32 s7, s10 16734; GFX6-NEXT: v_mov_b32_e32 v0, s9 16735; GFX6-NEXT: v_mov_b32_e32 v2, s8 16736; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16737; GFX6-NEXT: v_mov_b32_e32 v1, v2 16738; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 16739; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16740; GFX6-NEXT: s_waitcnt vmcnt(0) 16741; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 16742; GFX6-NEXT: s_endpgm 16743; 16744; GFX7-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16745; GFX7: ; %bb.0: ; %entry 16746; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 16747; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16748; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 16749; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 16750; GFX7-NEXT: s_mov_b64 s[12:13], 16 16751; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16752; GFX7-NEXT: s_mov_b32 s6, s4 16753; GFX7-NEXT: s_mov_b32 s7, s5 16754; GFX7-NEXT: s_mov_b32 s11, s12 16755; GFX7-NEXT: s_mov_b32 s10, s13 16756; GFX7-NEXT: s_add_u32 s6, s6, s11 16757; GFX7-NEXT: s_addc_u32 s10, s7, s10 16758; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 16759; GFX7-NEXT: s_mov_b32 s7, s10 16760; GFX7-NEXT: v_mov_b32_e32 v2, s9 16761; GFX7-NEXT: v_mov_b32_e32 v0, s8 16762; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16763; GFX7-NEXT: v_mov_b32_e32 v3, v0 16764; GFX7-NEXT: v_mov_b32_e32 v0, s6 16765; GFX7-NEXT: v_mov_b32_e32 v1, s7 16766; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 16767; GFX7-NEXT: v_mov_b32_e32 v0, s4 16768; GFX7-NEXT: v_mov_b32_e32 v1, s5 16769; GFX7-NEXT: s_waitcnt vmcnt(0) 16770; GFX7-NEXT: flat_store_dword v[0:1], v2 16771; GFX7-NEXT: s_endpgm 16772; 16773; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16774; GFX10-WGP: ; %bb.0: ; %entry 16775; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 16776; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16777; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 16778; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 16779; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16780; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 16781; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 16782; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16783; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 16784; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16785; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16786; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 16787; GFX10-WGP-NEXT: s_endpgm 16788; 16789; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16790; GFX10-CU: ; %bb.0: ; %entry 16791; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 16792; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16793; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 16794; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 16795; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16796; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 16797; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 16798; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16799; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 16800; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 16801; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16802; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 16803; GFX10-CU-NEXT: s_endpgm 16804; 16805; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16806; SKIP-CACHE-INV: ; %bb.0: ; %entry 16807; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 16808; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 16809; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 16810; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 16811; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16812; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 16813; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 16814; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 16815; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 16816; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 16817; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 16818; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 16819; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 16820; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 16821; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 16822; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16823; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 16824; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 16825; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16826; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16827; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 16828; SKIP-CACHE-INV-NEXT: s_endpgm 16829; 16830; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16831; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16833; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16834; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16835; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16836; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16837; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16838; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16839; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16840; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16841; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16842; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16843; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16844; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16845; 16846; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16847; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16848; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16849; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16850; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16851; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16852; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16853; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16854; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 16855; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16856; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16857; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 16858; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16859; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 16860; GFX90A-TGSPLIT-NEXT: s_endpgm 16861; 16862; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16863; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16864; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16865; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16866; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16867; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16868; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16869; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16870; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16871; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16872; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16873; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16874; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16875; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16876; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16877; 16878; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16879; GFX940-TGSPLIT: ; %bb.0: ; %entry 16880; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 16881; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16882; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16883; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16884; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16885; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16886; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 16887; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16888; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 16889; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 16890; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16891; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 16892; GFX940-TGSPLIT-NEXT: s_endpgm 16893; 16894; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16895; GFX11-WGP: ; %bb.0: ; %entry 16896; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 16897; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16898; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16899; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16900; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16901; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 16902; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 16903; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16904; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 16905; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16906; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16907; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16908; GFX11-WGP-NEXT: s_endpgm 16909; 16910; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16911; GFX11-CU: ; %bb.0: ; %entry 16912; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 16913; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16914; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16915; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16916; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16917; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 16918; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 16919; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16920; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 16921; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 16922; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16923; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16924; GFX11-CU-NEXT: s_endpgm 16925; 16926; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16927; GFX12-WGP: ; %bb.0: ; %entry 16928; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 16929; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16930; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16931; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16932; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16933; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 16934; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 16935; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16936; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 16937; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16938; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16939; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 16940; GFX12-WGP-NEXT: s_endpgm 16941; 16942; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: 16943; GFX12-CU: ; %bb.0: ; %entry 16944; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 16945; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16946; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16947; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16948; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16949; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 16950; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 16951; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 16952; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 16953; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 16954; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16955; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 16956; GFX12-CU-NEXT: s_endpgm 16957 ptr addrspace(1) %out, i32 %in, i32 %old) { 16958entry: 16959 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 16960 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire 16961 %val0 = extractvalue { i32, i1 } %val, 0 16962 store i32 %val0, ptr addrspace(1) %out, align 4 16963 ret void 16964} 16965 16966define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( 16967; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 16968; GFX6: ; %bb.0: ; %entry 16969; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 16970; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16971; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 16972; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 16973; GFX6-NEXT: s_waitcnt lgkmcnt(0) 16974; GFX6-NEXT: s_mov_b32 s12, s5 16975; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 16976; GFX6-NEXT: s_mov_b32 s10, 0x100f000 16977; GFX6-NEXT: s_mov_b32 s11, -1 16978; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 16979; GFX6-NEXT: s_mov_b32 s5, s12 16980; GFX6-NEXT: s_mov_b32 s6, s11 16981; GFX6-NEXT: s_mov_b32 s7, s10 16982; GFX6-NEXT: v_mov_b32_e32 v0, s9 16983; GFX6-NEXT: v_mov_b32_e32 v2, s8 16984; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 16985; GFX6-NEXT: v_mov_b32_e32 v1, v2 16986; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 16987; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 16988; GFX6-NEXT: s_waitcnt vmcnt(0) 16989; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 16990; GFX6-NEXT: s_endpgm 16991; 16992; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 16993; GFX7: ; %bb.0: ; %entry 16994; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 16995; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 16996; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 16997; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 16998; GFX7-NEXT: s_mov_b64 s[12:13], 16 16999; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17000; GFX7-NEXT: s_mov_b32 s6, s4 17001; GFX7-NEXT: s_mov_b32 s7, s5 17002; GFX7-NEXT: s_mov_b32 s11, s12 17003; GFX7-NEXT: s_mov_b32 s10, s13 17004; GFX7-NEXT: s_add_u32 s6, s6, s11 17005; GFX7-NEXT: s_addc_u32 s10, s7, s10 17006; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17007; GFX7-NEXT: s_mov_b32 s7, s10 17008; GFX7-NEXT: v_mov_b32_e32 v2, s9 17009; GFX7-NEXT: v_mov_b32_e32 v0, s8 17010; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17011; GFX7-NEXT: v_mov_b32_e32 v3, v0 17012; GFX7-NEXT: v_mov_b32_e32 v0, s6 17013; GFX7-NEXT: v_mov_b32_e32 v1, s7 17014; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17015; GFX7-NEXT: v_mov_b32_e32 v0, s4 17016; GFX7-NEXT: v_mov_b32_e32 v1, s5 17017; GFX7-NEXT: s_waitcnt vmcnt(0) 17018; GFX7-NEXT: flat_store_dword v[0:1], v2 17019; GFX7-NEXT: s_endpgm 17020; 17021; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17022; GFX10-WGP: ; %bb.0: ; %entry 17023; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17024; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17025; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17026; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17027; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17028; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17029; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17030; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17031; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17032; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17033; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17034; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17035; GFX10-WGP-NEXT: s_endpgm 17036; 17037; GFX10-CU-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17038; GFX10-CU: ; %bb.0: ; %entry 17039; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17040; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17041; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17042; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17043; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17044; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17045; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17046; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17047; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17048; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17049; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17050; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17051; GFX10-CU-NEXT: s_endpgm 17052; 17053; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17054; SKIP-CACHE-INV: ; %bb.0: ; %entry 17055; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17056; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17057; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17058; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17059; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17060; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17061; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17062; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17063; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17064; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17065; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17066; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17067; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17068; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17069; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17070; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17071; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17072; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17073; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17074; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17075; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17076; SKIP-CACHE-INV-NEXT: s_endpgm 17077; 17078; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17079; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17080; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17081; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17082; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17083; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17084; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17085; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17086; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17087; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17088; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17089; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17090; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17091; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17092; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17093; 17094; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17095; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17096; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17097; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17098; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17099; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17100; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17101; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17102; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17103; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17104; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17105; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17106; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17107; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17108; GFX90A-TGSPLIT-NEXT: s_endpgm 17109; 17110; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17111; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17112; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17113; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17114; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17115; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17116; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17117; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17118; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17119; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17120; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17121; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17122; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17123; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17124; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17125; 17126; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17127; GFX940-TGSPLIT: ; %bb.0: ; %entry 17128; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17129; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17130; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17131; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17132; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17133; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17134; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17135; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17136; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17137; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17138; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17139; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17140; GFX940-TGSPLIT-NEXT: s_endpgm 17141; 17142; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17143; GFX11-WGP: ; %bb.0: ; %entry 17144; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17145; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17146; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17147; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17148; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17149; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17150; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17151; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17152; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17153; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17154; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17155; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17156; GFX11-WGP-NEXT: s_endpgm 17157; 17158; GFX11-CU-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17159; GFX11-CU: ; %bb.0: ; %entry 17160; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17161; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17162; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17163; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17164; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17165; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17166; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17167; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17168; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17169; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17170; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17171; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17172; GFX11-CU-NEXT: s_endpgm 17173; 17174; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17175; GFX12-WGP: ; %bb.0: ; %entry 17176; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17177; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17178; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17179; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17180; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17181; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17182; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17183; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17184; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17185; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17186; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17187; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17188; GFX12-WGP-NEXT: s_endpgm 17189; 17190; GFX12-CU-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: 17191; GFX12-CU: ; %bb.0: ; %entry 17192; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17193; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17194; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17195; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17196; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17197; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17198; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17199; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17200; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17201; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17202; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17203; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17204; GFX12-CU-NEXT: s_endpgm 17205 ptr addrspace(1) %out, i32 %in, i32 %old) { 17206entry: 17207 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17208 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire 17209 %val0 = extractvalue { i32, i1 } %val, 0 17210 store i32 %val0, ptr addrspace(1) %out, align 4 17211 ret void 17212} 17213 17214define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( 17215; GFX6-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17216; GFX6: ; %bb.0: ; %entry 17217; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17218; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17219; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17220; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17221; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17222; GFX6-NEXT: s_mov_b32 s12, s5 17223; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17224; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17225; GFX6-NEXT: s_mov_b32 s11, -1 17226; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17227; GFX6-NEXT: s_mov_b32 s5, s12 17228; GFX6-NEXT: s_mov_b32 s6, s11 17229; GFX6-NEXT: s_mov_b32 s7, s10 17230; GFX6-NEXT: v_mov_b32_e32 v0, s9 17231; GFX6-NEXT: v_mov_b32_e32 v2, s8 17232; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17233; GFX6-NEXT: v_mov_b32_e32 v1, v2 17234; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17235; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17236; GFX6-NEXT: s_waitcnt vmcnt(0) 17237; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17238; GFX6-NEXT: s_endpgm 17239; 17240; GFX7-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17241; GFX7: ; %bb.0: ; %entry 17242; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17243; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17244; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17245; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17246; GFX7-NEXT: s_mov_b64 s[12:13], 16 17247; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17248; GFX7-NEXT: s_mov_b32 s6, s4 17249; GFX7-NEXT: s_mov_b32 s7, s5 17250; GFX7-NEXT: s_mov_b32 s11, s12 17251; GFX7-NEXT: s_mov_b32 s10, s13 17252; GFX7-NEXT: s_add_u32 s6, s6, s11 17253; GFX7-NEXT: s_addc_u32 s10, s7, s10 17254; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17255; GFX7-NEXT: s_mov_b32 s7, s10 17256; GFX7-NEXT: v_mov_b32_e32 v2, s9 17257; GFX7-NEXT: v_mov_b32_e32 v0, s8 17258; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17259; GFX7-NEXT: v_mov_b32_e32 v3, v0 17260; GFX7-NEXT: v_mov_b32_e32 v0, s6 17261; GFX7-NEXT: v_mov_b32_e32 v1, s7 17262; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17263; GFX7-NEXT: v_mov_b32_e32 v0, s4 17264; GFX7-NEXT: v_mov_b32_e32 v1, s5 17265; GFX7-NEXT: s_waitcnt vmcnt(0) 17266; GFX7-NEXT: flat_store_dword v[0:1], v2 17267; GFX7-NEXT: s_endpgm 17268; 17269; GFX10-WGP-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17270; GFX10-WGP: ; %bb.0: ; %entry 17271; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17272; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17273; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17274; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17275; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17276; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17277; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17278; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17279; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17280; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17281; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17282; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17283; GFX10-WGP-NEXT: s_endpgm 17284; 17285; GFX10-CU-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17286; GFX10-CU: ; %bb.0: ; %entry 17287; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17288; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17289; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17290; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17291; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17292; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17293; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17294; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17295; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17296; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17297; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17298; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17299; GFX10-CU-NEXT: s_endpgm 17300; 17301; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17302; SKIP-CACHE-INV: ; %bb.0: ; %entry 17303; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17304; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17305; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17306; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17307; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17308; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17309; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17310; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17311; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17312; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17313; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17314; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17315; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17316; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17317; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17318; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17319; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17320; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17321; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17322; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17323; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17324; SKIP-CACHE-INV-NEXT: s_endpgm 17325; 17326; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17327; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17328; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17329; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17330; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17331; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17332; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17333; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17334; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17335; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17336; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17337; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17338; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17339; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17340; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17341; 17342; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17343; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17344; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17345; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17346; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17347; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17348; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17349; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17350; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17351; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17352; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17353; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17354; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17355; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17356; GFX90A-TGSPLIT-NEXT: s_endpgm 17357; 17358; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17359; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17360; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17361; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17362; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17363; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17364; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17365; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17366; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17367; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17368; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17369; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17370; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17371; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17372; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17373; 17374; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17375; GFX940-TGSPLIT: ; %bb.0: ; %entry 17376; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17377; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17378; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17379; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17380; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17381; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17382; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17383; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17384; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17385; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17386; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17387; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17388; GFX940-TGSPLIT-NEXT: s_endpgm 17389; 17390; GFX11-WGP-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17391; GFX11-WGP: ; %bb.0: ; %entry 17392; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17393; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17394; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17395; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17396; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17397; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17398; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17399; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17400; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17401; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17402; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17403; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17404; GFX11-WGP-NEXT: s_endpgm 17405; 17406; GFX11-CU-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17407; GFX11-CU: ; %bb.0: ; %entry 17408; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17409; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17410; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17411; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17412; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17413; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17414; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17415; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17416; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17417; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17418; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17419; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17420; GFX11-CU-NEXT: s_endpgm 17421; 17422; GFX12-WGP-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17423; GFX12-WGP: ; %bb.0: ; %entry 17424; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17425; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17426; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17427; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17428; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17429; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17430; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17431; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17432; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17433; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17434; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17435; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17436; GFX12-WGP-NEXT: s_endpgm 17437; 17438; GFX12-CU-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: 17439; GFX12-CU: ; %bb.0: ; %entry 17440; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17441; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17442; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17443; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17444; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17445; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17446; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17447; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17448; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17449; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17450; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17451; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17452; GFX12-CU-NEXT: s_endpgm 17453 ptr addrspace(1) %out, i32 %in, i32 %old) { 17454entry: 17455 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17456 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire 17457 %val0 = extractvalue { i32, i1 } %val, 0 17458 store i32 %val0, ptr addrspace(1) %out, align 4 17459 ret void 17460} 17461 17462define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( 17463; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17464; GFX6: ; %bb.0: ; %entry 17465; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17466; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17467; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17468; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17469; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17470; GFX6-NEXT: s_mov_b32 s12, s5 17471; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17472; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17473; GFX6-NEXT: s_mov_b32 s11, -1 17474; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17475; GFX6-NEXT: s_mov_b32 s5, s12 17476; GFX6-NEXT: s_mov_b32 s6, s11 17477; GFX6-NEXT: s_mov_b32 s7, s10 17478; GFX6-NEXT: v_mov_b32_e32 v0, s9 17479; GFX6-NEXT: v_mov_b32_e32 v2, s8 17480; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17481; GFX6-NEXT: v_mov_b32_e32 v1, v2 17482; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17483; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17484; GFX6-NEXT: s_waitcnt vmcnt(0) 17485; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17486; GFX6-NEXT: s_endpgm 17487; 17488; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17489; GFX7: ; %bb.0: ; %entry 17490; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17491; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17492; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17493; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17494; GFX7-NEXT: s_mov_b64 s[12:13], 16 17495; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17496; GFX7-NEXT: s_mov_b32 s6, s4 17497; GFX7-NEXT: s_mov_b32 s7, s5 17498; GFX7-NEXT: s_mov_b32 s11, s12 17499; GFX7-NEXT: s_mov_b32 s10, s13 17500; GFX7-NEXT: s_add_u32 s6, s6, s11 17501; GFX7-NEXT: s_addc_u32 s10, s7, s10 17502; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17503; GFX7-NEXT: s_mov_b32 s7, s10 17504; GFX7-NEXT: v_mov_b32_e32 v2, s9 17505; GFX7-NEXT: v_mov_b32_e32 v0, s8 17506; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17507; GFX7-NEXT: v_mov_b32_e32 v3, v0 17508; GFX7-NEXT: v_mov_b32_e32 v0, s6 17509; GFX7-NEXT: v_mov_b32_e32 v1, s7 17510; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17511; GFX7-NEXT: v_mov_b32_e32 v0, s4 17512; GFX7-NEXT: v_mov_b32_e32 v1, s5 17513; GFX7-NEXT: s_waitcnt vmcnt(0) 17514; GFX7-NEXT: flat_store_dword v[0:1], v2 17515; GFX7-NEXT: s_endpgm 17516; 17517; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17518; GFX10-WGP: ; %bb.0: ; %entry 17519; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17520; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17521; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17522; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17523; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17524; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17525; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17526; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17527; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17528; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17529; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17530; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17531; GFX10-WGP-NEXT: s_endpgm 17532; 17533; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17534; GFX10-CU: ; %bb.0: ; %entry 17535; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17536; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17537; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17538; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17539; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17540; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17541; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17542; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17543; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17544; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17545; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17546; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17547; GFX10-CU-NEXT: s_endpgm 17548; 17549; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17550; SKIP-CACHE-INV: ; %bb.0: ; %entry 17551; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17552; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17553; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17554; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17555; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17556; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17557; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17558; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17559; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17560; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17561; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17562; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17563; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17564; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17565; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17566; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17567; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17568; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17569; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17570; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17571; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17572; SKIP-CACHE-INV-NEXT: s_endpgm 17573; 17574; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17575; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17576; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17577; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17578; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17579; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17580; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17581; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17582; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17583; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17584; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17585; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17586; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17587; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17588; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17589; 17590; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17591; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17592; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17593; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17594; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17595; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17596; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17597; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17598; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17599; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17600; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17601; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17602; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17603; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17604; GFX90A-TGSPLIT-NEXT: s_endpgm 17605; 17606; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17607; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17608; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17609; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17610; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17611; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17612; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17613; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17614; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17615; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17616; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17617; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17618; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17619; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17620; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17621; 17622; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17623; GFX940-TGSPLIT: ; %bb.0: ; %entry 17624; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17625; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17626; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17627; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17628; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17629; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17630; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17631; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17632; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17633; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17634; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17635; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17636; GFX940-TGSPLIT-NEXT: s_endpgm 17637; 17638; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17639; GFX11-WGP: ; %bb.0: ; %entry 17640; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17641; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17642; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17643; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17644; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17645; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17646; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17647; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17648; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17649; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17650; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17651; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17652; GFX11-WGP-NEXT: s_endpgm 17653; 17654; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17655; GFX11-CU: ; %bb.0: ; %entry 17656; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17657; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17658; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17659; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17660; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17661; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17662; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17663; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17664; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17665; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17666; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17667; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17668; GFX11-CU-NEXT: s_endpgm 17669; 17670; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17671; GFX12-WGP: ; %bb.0: ; %entry 17672; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17673; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17674; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17675; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17676; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17677; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17678; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17679; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17680; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17681; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17682; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17683; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17684; GFX12-WGP-NEXT: s_endpgm 17685; 17686; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: 17687; GFX12-CU: ; %bb.0: ; %entry 17688; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17689; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17690; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17691; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17692; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17693; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17694; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17695; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17696; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17697; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17698; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17699; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17700; GFX12-CU-NEXT: s_endpgm 17701 ptr addrspace(1) %out, i32 %in, i32 %old) { 17702entry: 17703 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17704 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire 17705 %val0 = extractvalue { i32, i1 } %val, 0 17706 store i32 %val0, ptr addrspace(1) %out, align 4 17707 ret void 17708} 17709 17710define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( 17711; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17712; GFX6: ; %bb.0: ; %entry 17713; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17714; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17715; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17716; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17717; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17718; GFX6-NEXT: s_mov_b32 s12, s5 17719; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17720; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17721; GFX6-NEXT: s_mov_b32 s11, -1 17722; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17723; GFX6-NEXT: s_mov_b32 s5, s12 17724; GFX6-NEXT: s_mov_b32 s6, s11 17725; GFX6-NEXT: s_mov_b32 s7, s10 17726; GFX6-NEXT: v_mov_b32_e32 v0, s9 17727; GFX6-NEXT: v_mov_b32_e32 v2, s8 17728; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17729; GFX6-NEXT: v_mov_b32_e32 v1, v2 17730; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17731; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17732; GFX6-NEXT: s_waitcnt vmcnt(0) 17733; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17734; GFX6-NEXT: s_endpgm 17735; 17736; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17737; GFX7: ; %bb.0: ; %entry 17738; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17739; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17740; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17741; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17742; GFX7-NEXT: s_mov_b64 s[12:13], 16 17743; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17744; GFX7-NEXT: s_mov_b32 s6, s4 17745; GFX7-NEXT: s_mov_b32 s7, s5 17746; GFX7-NEXT: s_mov_b32 s11, s12 17747; GFX7-NEXT: s_mov_b32 s10, s13 17748; GFX7-NEXT: s_add_u32 s6, s6, s11 17749; GFX7-NEXT: s_addc_u32 s10, s7, s10 17750; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17751; GFX7-NEXT: s_mov_b32 s7, s10 17752; GFX7-NEXT: v_mov_b32_e32 v2, s9 17753; GFX7-NEXT: v_mov_b32_e32 v0, s8 17754; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17755; GFX7-NEXT: v_mov_b32_e32 v3, v0 17756; GFX7-NEXT: v_mov_b32_e32 v0, s6 17757; GFX7-NEXT: v_mov_b32_e32 v1, s7 17758; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 17759; GFX7-NEXT: v_mov_b32_e32 v0, s4 17760; GFX7-NEXT: v_mov_b32_e32 v1, s5 17761; GFX7-NEXT: s_waitcnt vmcnt(0) 17762; GFX7-NEXT: flat_store_dword v[0:1], v2 17763; GFX7-NEXT: s_endpgm 17764; 17765; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17766; GFX10-WGP: ; %bb.0: ; %entry 17767; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 17768; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17769; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 17770; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 17771; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17772; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 17773; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 17774; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17775; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 17776; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17777; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17778; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 17779; GFX10-WGP-NEXT: s_endpgm 17780; 17781; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17782; GFX10-CU: ; %bb.0: ; %entry 17783; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 17784; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17785; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 17786; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 17787; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17788; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 17789; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 17790; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17791; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 17792; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 17793; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17794; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 17795; GFX10-CU-NEXT: s_endpgm 17796; 17797; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17798; SKIP-CACHE-INV: ; %bb.0: ; %entry 17799; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 17800; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 17801; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 17802; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 17803; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17804; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 17805; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 17806; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 17807; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 17808; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 17809; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 17810; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 17811; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 17812; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 17813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 17814; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17815; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 17816; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 17817; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17818; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17819; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 17820; SKIP-CACHE-INV-NEXT: s_endpgm 17821; 17822; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17823; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17825; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17826; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17827; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17828; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17829; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17830; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17831; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17832; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17833; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17834; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17835; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17836; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17837; 17838; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17839; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17841; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17842; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17843; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17844; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17845; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17846; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 17847; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17848; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17849; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 17850; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17851; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 17852; GFX90A-TGSPLIT-NEXT: s_endpgm 17853; 17854; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17855; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17856; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17857; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17858; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17859; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17860; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17861; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17862; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17863; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17864; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17865; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17866; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17867; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17868; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17869; 17870; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17871; GFX940-TGSPLIT: ; %bb.0: ; %entry 17872; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 17873; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17874; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17875; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17876; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17877; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17878; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 17879; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17880; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 17881; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 17882; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17883; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 17884; GFX940-TGSPLIT-NEXT: s_endpgm 17885; 17886; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17887; GFX11-WGP: ; %bb.0: ; %entry 17888; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 17889; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17890; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17891; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17892; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17893; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 17894; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 17895; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17896; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 17897; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17898; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17899; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17900; GFX11-WGP-NEXT: s_endpgm 17901; 17902; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17903; GFX11-CU: ; %bb.0: ; %entry 17904; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 17905; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17906; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17907; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17908; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17909; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 17910; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 17911; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17912; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 17913; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 17914; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17915; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17916; GFX11-CU-NEXT: s_endpgm 17917; 17918; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17919; GFX12-WGP: ; %bb.0: ; %entry 17920; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 17921; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17922; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17923; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17924; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17925; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 17926; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 17927; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17928; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 17929; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17930; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17931; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 17932; GFX12-WGP-NEXT: s_endpgm 17933; 17934; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: 17935; GFX12-CU: ; %bb.0: ; %entry 17936; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 17937; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17938; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17939; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17940; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17941; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 17942; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 17943; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 17944; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 17945; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 17946; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17947; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 17948; GFX12-CU-NEXT: s_endpgm 17949 ptr addrspace(1) %out, i32 %in, i32 %old) { 17950entry: 17951 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 17952 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire 17953 %val0 = extractvalue { i32, i1 } %val, 0 17954 store i32 %val0, ptr addrspace(1) %out, align 4 17955 ret void 17956} 17957 17958define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( 17959; GFX6-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 17960; GFX6: ; %bb.0: ; %entry 17961; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 17962; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17963; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 17964; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 17965; GFX6-NEXT: s_waitcnt lgkmcnt(0) 17966; GFX6-NEXT: s_mov_b32 s12, s5 17967; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 17968; GFX6-NEXT: s_mov_b32 s10, 0x100f000 17969; GFX6-NEXT: s_mov_b32 s11, -1 17970; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 17971; GFX6-NEXT: s_mov_b32 s5, s12 17972; GFX6-NEXT: s_mov_b32 s6, s11 17973; GFX6-NEXT: s_mov_b32 s7, s10 17974; GFX6-NEXT: v_mov_b32_e32 v0, s9 17975; GFX6-NEXT: v_mov_b32_e32 v2, s8 17976; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 17977; GFX6-NEXT: v_mov_b32_e32 v1, v2 17978; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 17979; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 17980; GFX6-NEXT: s_waitcnt vmcnt(0) 17981; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 17982; GFX6-NEXT: s_endpgm 17983; 17984; GFX7-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 17985; GFX7: ; %bb.0: ; %entry 17986; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 17987; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 17988; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 17989; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 17990; GFX7-NEXT: s_mov_b64 s[12:13], 16 17991; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17992; GFX7-NEXT: s_mov_b32 s6, s4 17993; GFX7-NEXT: s_mov_b32 s7, s5 17994; GFX7-NEXT: s_mov_b32 s11, s12 17995; GFX7-NEXT: s_mov_b32 s10, s13 17996; GFX7-NEXT: s_add_u32 s6, s6, s11 17997; GFX7-NEXT: s_addc_u32 s10, s7, s10 17998; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 17999; GFX7-NEXT: s_mov_b32 s7, s10 18000; GFX7-NEXT: v_mov_b32_e32 v2, s9 18001; GFX7-NEXT: v_mov_b32_e32 v0, s8 18002; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18003; GFX7-NEXT: v_mov_b32_e32 v3, v0 18004; GFX7-NEXT: v_mov_b32_e32 v0, s6 18005; GFX7-NEXT: v_mov_b32_e32 v1, s7 18006; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18007; GFX7-NEXT: v_mov_b32_e32 v0, s4 18008; GFX7-NEXT: v_mov_b32_e32 v1, s5 18009; GFX7-NEXT: s_waitcnt vmcnt(0) 18010; GFX7-NEXT: flat_store_dword v[0:1], v2 18011; GFX7-NEXT: s_endpgm 18012; 18013; GFX10-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18014; GFX10-WGP: ; %bb.0: ; %entry 18015; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18016; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18017; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18018; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18019; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18020; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18021; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18022; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18023; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18024; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18025; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18026; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18027; GFX10-WGP-NEXT: s_endpgm 18028; 18029; GFX10-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18030; GFX10-CU: ; %bb.0: ; %entry 18031; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18032; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18033; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18034; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18035; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18036; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18037; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18038; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18039; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18040; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18041; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18042; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18043; GFX10-CU-NEXT: s_endpgm 18044; 18045; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18046; SKIP-CACHE-INV: ; %bb.0: ; %entry 18047; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18048; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18049; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18050; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18051; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18052; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18053; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18054; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18055; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18056; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18057; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18058; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18059; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18060; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18061; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18062; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18063; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18064; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18065; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18066; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18067; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18068; SKIP-CACHE-INV-NEXT: s_endpgm 18069; 18070; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18071; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18072; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18073; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18074; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18075; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18076; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18077; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18078; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18079; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18080; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18081; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18082; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18083; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18084; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18085; 18086; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18087; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18088; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18089; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18090; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18091; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18092; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18093; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18094; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18095; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18096; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18097; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18098; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18099; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18100; GFX90A-TGSPLIT-NEXT: s_endpgm 18101; 18102; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18103; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18104; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18105; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18106; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18107; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18108; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18109; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18110; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18111; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18112; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18113; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18114; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18115; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18116; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18117; 18118; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18119; GFX940-TGSPLIT: ; %bb.0: ; %entry 18120; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18121; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18122; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18123; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18124; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18125; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18126; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18127; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18128; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18129; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18130; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18131; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18132; GFX940-TGSPLIT-NEXT: s_endpgm 18133; 18134; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18135; GFX11-WGP: ; %bb.0: ; %entry 18136; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18137; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18138; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18139; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18140; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18141; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18142; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18143; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18144; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18145; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18146; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18147; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18148; GFX11-WGP-NEXT: s_endpgm 18149; 18150; GFX11-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18151; GFX11-CU: ; %bb.0: ; %entry 18152; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18153; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18154; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18155; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18156; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18157; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18158; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18159; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18160; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18161; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18162; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18163; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18164; GFX11-CU-NEXT: s_endpgm 18165; 18166; GFX12-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18167; GFX12-WGP: ; %bb.0: ; %entry 18168; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18169; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18170; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18171; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18172; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18173; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18174; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18175; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18176; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18177; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18178; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18179; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18180; GFX12-WGP-NEXT: s_endpgm 18181; 18182; GFX12-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: 18183; GFX12-CU: ; %bb.0: ; %entry 18184; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18185; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18186; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18187; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18188; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18189; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18190; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18191; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18192; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18193; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18194; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18195; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18196; GFX12-CU-NEXT: s_endpgm 18197 ptr addrspace(1) %out, i32 %in, i32 %old) { 18198entry: 18199 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18200 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst 18201 %val0 = extractvalue { i32, i1 } %val, 0 18202 store i32 %val0, ptr addrspace(1) %out, align 4 18203 ret void 18204} 18205 18206define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( 18207; GFX6-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18208; GFX6: ; %bb.0: ; %entry 18209; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18210; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18211; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18212; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18213; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18214; GFX6-NEXT: s_mov_b32 s12, s5 18215; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18216; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18217; GFX6-NEXT: s_mov_b32 s11, -1 18218; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18219; GFX6-NEXT: s_mov_b32 s5, s12 18220; GFX6-NEXT: s_mov_b32 s6, s11 18221; GFX6-NEXT: s_mov_b32 s7, s10 18222; GFX6-NEXT: v_mov_b32_e32 v0, s9 18223; GFX6-NEXT: v_mov_b32_e32 v2, s8 18224; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18225; GFX6-NEXT: v_mov_b32_e32 v1, v2 18226; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18227; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18228; GFX6-NEXT: s_waitcnt vmcnt(0) 18229; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18230; GFX6-NEXT: s_endpgm 18231; 18232; GFX7-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18233; GFX7: ; %bb.0: ; %entry 18234; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18235; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18236; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18237; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18238; GFX7-NEXT: s_mov_b64 s[12:13], 16 18239; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18240; GFX7-NEXT: s_mov_b32 s6, s4 18241; GFX7-NEXT: s_mov_b32 s7, s5 18242; GFX7-NEXT: s_mov_b32 s11, s12 18243; GFX7-NEXT: s_mov_b32 s10, s13 18244; GFX7-NEXT: s_add_u32 s6, s6, s11 18245; GFX7-NEXT: s_addc_u32 s10, s7, s10 18246; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18247; GFX7-NEXT: s_mov_b32 s7, s10 18248; GFX7-NEXT: v_mov_b32_e32 v2, s9 18249; GFX7-NEXT: v_mov_b32_e32 v0, s8 18250; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18251; GFX7-NEXT: v_mov_b32_e32 v3, v0 18252; GFX7-NEXT: v_mov_b32_e32 v0, s6 18253; GFX7-NEXT: v_mov_b32_e32 v1, s7 18254; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18255; GFX7-NEXT: v_mov_b32_e32 v0, s4 18256; GFX7-NEXT: v_mov_b32_e32 v1, s5 18257; GFX7-NEXT: s_waitcnt vmcnt(0) 18258; GFX7-NEXT: flat_store_dword v[0:1], v2 18259; GFX7-NEXT: s_endpgm 18260; 18261; GFX10-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18262; GFX10-WGP: ; %bb.0: ; %entry 18263; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18264; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18265; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18266; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18267; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18268; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18269; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18270; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18271; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18272; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18273; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18274; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18275; GFX10-WGP-NEXT: s_endpgm 18276; 18277; GFX10-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18278; GFX10-CU: ; %bb.0: ; %entry 18279; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18280; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18281; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18282; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18283; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18284; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18285; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18286; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18287; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18288; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18289; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18290; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18291; GFX10-CU-NEXT: s_endpgm 18292; 18293; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18294; SKIP-CACHE-INV: ; %bb.0: ; %entry 18295; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18296; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18297; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18298; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18299; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18300; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18301; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18302; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18303; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18304; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18305; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18306; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18307; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18308; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18309; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18310; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18311; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18312; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18313; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18314; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18315; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18316; SKIP-CACHE-INV-NEXT: s_endpgm 18317; 18318; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18319; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18320; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18321; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18322; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18323; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18324; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18325; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18326; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18327; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18328; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18329; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18330; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18331; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18332; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18333; 18334; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18335; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18336; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18337; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18338; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18339; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18340; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18341; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18342; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18343; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18344; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18345; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18346; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18347; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18348; GFX90A-TGSPLIT-NEXT: s_endpgm 18349; 18350; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18351; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18352; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18353; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18354; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18355; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18356; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18357; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18358; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18359; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18360; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18361; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18362; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18363; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18364; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18365; 18366; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18367; GFX940-TGSPLIT: ; %bb.0: ; %entry 18368; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18369; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18370; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18371; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18372; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18373; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18374; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18375; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18376; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18377; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18378; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18379; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18380; GFX940-TGSPLIT-NEXT: s_endpgm 18381; 18382; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18383; GFX11-WGP: ; %bb.0: ; %entry 18384; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18385; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18386; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18387; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18388; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18389; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18390; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18391; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18392; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18393; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18394; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18395; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18396; GFX11-WGP-NEXT: s_endpgm 18397; 18398; GFX11-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18399; GFX11-CU: ; %bb.0: ; %entry 18400; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18401; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18402; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18403; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18404; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18405; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18406; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18407; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18408; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18409; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18410; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18411; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18412; GFX11-CU-NEXT: s_endpgm 18413; 18414; GFX12-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18415; GFX12-WGP: ; %bb.0: ; %entry 18416; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18417; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18418; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18419; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18420; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18421; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18422; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18423; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18424; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18425; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18426; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18427; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18428; GFX12-WGP-NEXT: s_endpgm 18429; 18430; GFX12-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: 18431; GFX12-CU: ; %bb.0: ; %entry 18432; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18433; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18434; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18435; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18436; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18437; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18438; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18439; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18440; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18441; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18442; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18443; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18444; GFX12-CU-NEXT: s_endpgm 18445 ptr addrspace(1) %out, i32 %in, i32 %old) { 18446entry: 18447 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18448 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst 18449 %val0 = extractvalue { i32, i1 } %val, 0 18450 store i32 %val0, ptr addrspace(1) %out, align 4 18451 ret void 18452} 18453 18454define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg( 18455; GFX6-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18456; GFX6: ; %bb.0: ; %entry 18457; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18458; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18459; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18460; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18461; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18462; GFX6-NEXT: s_mov_b32 s12, s5 18463; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18464; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18465; GFX6-NEXT: s_mov_b32 s11, -1 18466; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18467; GFX6-NEXT: s_mov_b32 s5, s12 18468; GFX6-NEXT: s_mov_b32 s6, s11 18469; GFX6-NEXT: s_mov_b32 s7, s10 18470; GFX6-NEXT: v_mov_b32_e32 v0, s9 18471; GFX6-NEXT: v_mov_b32_e32 v2, s8 18472; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18473; GFX6-NEXT: v_mov_b32_e32 v1, v2 18474; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18475; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18476; GFX6-NEXT: s_waitcnt vmcnt(0) 18477; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18478; GFX6-NEXT: s_endpgm 18479; 18480; GFX7-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18481; GFX7: ; %bb.0: ; %entry 18482; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18483; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18484; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18485; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18486; GFX7-NEXT: s_mov_b64 s[12:13], 16 18487; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18488; GFX7-NEXT: s_mov_b32 s6, s4 18489; GFX7-NEXT: s_mov_b32 s7, s5 18490; GFX7-NEXT: s_mov_b32 s11, s12 18491; GFX7-NEXT: s_mov_b32 s10, s13 18492; GFX7-NEXT: s_add_u32 s6, s6, s11 18493; GFX7-NEXT: s_addc_u32 s10, s7, s10 18494; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18495; GFX7-NEXT: s_mov_b32 s7, s10 18496; GFX7-NEXT: v_mov_b32_e32 v2, s9 18497; GFX7-NEXT: v_mov_b32_e32 v0, s8 18498; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18499; GFX7-NEXT: v_mov_b32_e32 v3, v0 18500; GFX7-NEXT: v_mov_b32_e32 v0, s6 18501; GFX7-NEXT: v_mov_b32_e32 v1, s7 18502; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18503; GFX7-NEXT: v_mov_b32_e32 v0, s4 18504; GFX7-NEXT: v_mov_b32_e32 v1, s5 18505; GFX7-NEXT: s_waitcnt vmcnt(0) 18506; GFX7-NEXT: flat_store_dword v[0:1], v2 18507; GFX7-NEXT: s_endpgm 18508; 18509; GFX10-WGP-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18510; GFX10-WGP: ; %bb.0: ; %entry 18511; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18512; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18513; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18514; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18515; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18516; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18517; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18518; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18519; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18520; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18521; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18522; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18523; GFX10-WGP-NEXT: s_endpgm 18524; 18525; GFX10-CU-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18526; GFX10-CU: ; %bb.0: ; %entry 18527; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18528; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18529; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18530; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18531; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18532; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18533; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18534; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18535; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18536; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18537; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18538; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18539; GFX10-CU-NEXT: s_endpgm 18540; 18541; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18542; SKIP-CACHE-INV: ; %bb.0: ; %entry 18543; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18544; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18545; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18546; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18547; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18548; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18549; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18550; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18551; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18552; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18553; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18554; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18555; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18556; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18557; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18558; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18559; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18560; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18561; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18562; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18563; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18564; SKIP-CACHE-INV-NEXT: s_endpgm 18565; 18566; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18567; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18568; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18569; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18570; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18571; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18572; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18573; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18574; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18575; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18576; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18577; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18578; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18579; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18580; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18581; 18582; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18583; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18584; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18585; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18586; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18587; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18588; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18589; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18590; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18591; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18592; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18593; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18594; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18595; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18596; GFX90A-TGSPLIT-NEXT: s_endpgm 18597; 18598; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18599; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18600; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18601; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18602; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18603; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18604; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18605; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18606; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18607; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18608; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18609; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18610; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18611; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18612; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18613; 18614; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18615; GFX940-TGSPLIT: ; %bb.0: ; %entry 18616; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18617; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18618; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18619; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18620; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18621; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18622; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18623; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18624; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18625; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18626; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18627; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18628; GFX940-TGSPLIT-NEXT: s_endpgm 18629; 18630; GFX11-WGP-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18631; GFX11-WGP: ; %bb.0: ; %entry 18632; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18633; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18634; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18635; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18636; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18637; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18638; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18639; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18640; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18641; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18642; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18643; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18644; GFX11-WGP-NEXT: s_endpgm 18645; 18646; GFX11-CU-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18647; GFX11-CU: ; %bb.0: ; %entry 18648; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18649; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18650; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18651; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18652; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18653; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18654; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18655; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18656; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18657; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18658; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18659; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18660; GFX11-CU-NEXT: s_endpgm 18661; 18662; GFX12-WGP-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18663; GFX12-WGP: ; %bb.0: ; %entry 18664; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18665; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18666; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18667; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18668; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18669; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18670; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18671; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18672; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18673; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18674; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18675; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18676; GFX12-WGP-NEXT: s_endpgm 18677; 18678; GFX12-CU-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: 18679; GFX12-CU: ; %bb.0: ; %entry 18680; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18681; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18682; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18683; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18684; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18685; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18686; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18687; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18688; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18689; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18690; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18691; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18692; GFX12-CU-NEXT: s_endpgm 18693 ptr addrspace(1) %out, i32 %in, i32 %old) { 18694entry: 18695 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18696 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst 18697 %val0 = extractvalue { i32, i1 } %val, 0 18698 store i32 %val0, ptr addrspace(1) %out, align 4 18699 ret void 18700} 18701 18702define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( 18703; GFX6-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18704; GFX6: ; %bb.0: ; %entry 18705; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18706; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18707; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18708; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18709; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18710; GFX6-NEXT: s_mov_b32 s12, s5 18711; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18712; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18713; GFX6-NEXT: s_mov_b32 s11, -1 18714; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18715; GFX6-NEXT: s_mov_b32 s5, s12 18716; GFX6-NEXT: s_mov_b32 s6, s11 18717; GFX6-NEXT: s_mov_b32 s7, s10 18718; GFX6-NEXT: v_mov_b32_e32 v0, s9 18719; GFX6-NEXT: v_mov_b32_e32 v2, s8 18720; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18721; GFX6-NEXT: v_mov_b32_e32 v1, v2 18722; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18723; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18724; GFX6-NEXT: s_waitcnt vmcnt(0) 18725; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18726; GFX6-NEXT: s_endpgm 18727; 18728; GFX7-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18729; GFX7: ; %bb.0: ; %entry 18730; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18731; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18732; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18733; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18734; GFX7-NEXT: s_mov_b64 s[12:13], 16 18735; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18736; GFX7-NEXT: s_mov_b32 s6, s4 18737; GFX7-NEXT: s_mov_b32 s7, s5 18738; GFX7-NEXT: s_mov_b32 s11, s12 18739; GFX7-NEXT: s_mov_b32 s10, s13 18740; GFX7-NEXT: s_add_u32 s6, s6, s11 18741; GFX7-NEXT: s_addc_u32 s10, s7, s10 18742; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18743; GFX7-NEXT: s_mov_b32 s7, s10 18744; GFX7-NEXT: v_mov_b32_e32 v2, s9 18745; GFX7-NEXT: v_mov_b32_e32 v0, s8 18746; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18747; GFX7-NEXT: v_mov_b32_e32 v3, v0 18748; GFX7-NEXT: v_mov_b32_e32 v0, s6 18749; GFX7-NEXT: v_mov_b32_e32 v1, s7 18750; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18751; GFX7-NEXT: v_mov_b32_e32 v0, s4 18752; GFX7-NEXT: v_mov_b32_e32 v1, s5 18753; GFX7-NEXT: s_waitcnt vmcnt(0) 18754; GFX7-NEXT: flat_store_dword v[0:1], v2 18755; GFX7-NEXT: s_endpgm 18756; 18757; GFX10-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18758; GFX10-WGP: ; %bb.0: ; %entry 18759; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 18760; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18761; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 18762; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 18763; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18764; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18765; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 18766; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18767; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 18768; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18769; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18770; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 18771; GFX10-WGP-NEXT: s_endpgm 18772; 18773; GFX10-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18774; GFX10-CU: ; %bb.0: ; %entry 18775; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 18776; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18777; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 18778; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 18779; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18780; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18781; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 18782; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18783; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 18784; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 18785; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18786; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 18787; GFX10-CU-NEXT: s_endpgm 18788; 18789; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18790; SKIP-CACHE-INV: ; %bb.0: ; %entry 18791; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18792; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18793; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18794; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18795; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18796; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 18797; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 18798; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 18799; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 18800; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 18801; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 18802; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 18803; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18804; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 18805; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 18806; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18807; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 18808; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 18809; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18810; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18811; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 18812; SKIP-CACHE-INV-NEXT: s_endpgm 18813; 18814; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18815; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18816; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18817; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18818; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18819; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18820; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18821; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18822; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18823; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18824; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18825; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18826; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18827; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18828; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18829; 18830; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18831; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18832; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18833; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18834; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18835; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18836; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18837; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18838; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 18839; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18841; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 18842; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18843; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 18844; GFX90A-TGSPLIT-NEXT: s_endpgm 18845; 18846; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18847; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18848; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18849; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18850; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18851; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18852; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18853; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18854; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18855; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18856; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18857; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18858; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18859; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18860; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18861; 18862; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18863; GFX940-TGSPLIT: ; %bb.0: ; %entry 18864; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 18865; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18866; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18867; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18868; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18869; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18870; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 18871; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18872; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 18873; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 18874; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18875; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 18876; GFX940-TGSPLIT-NEXT: s_endpgm 18877; 18878; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18879; GFX11-WGP: ; %bb.0: ; %entry 18880; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 18881; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18882; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18883; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18884; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18885; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 18886; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 18887; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18888; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 18889; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18890; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18891; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18892; GFX11-WGP-NEXT: s_endpgm 18893; 18894; GFX11-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18895; GFX11-CU: ; %bb.0: ; %entry 18896; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 18897; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18898; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18899; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18900; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18901; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 18902; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 18903; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18904; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 18905; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 18906; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18907; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18908; GFX11-CU-NEXT: s_endpgm 18909; 18910; GFX12-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18911; GFX12-WGP: ; %bb.0: ; %entry 18912; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 18913; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18914; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18915; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18916; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18917; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 18918; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 18919; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18920; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 18921; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18922; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18923; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 18924; GFX12-WGP-NEXT: s_endpgm 18925; 18926; GFX12-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: 18927; GFX12-CU: ; %bb.0: ; %entry 18928; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 18929; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18930; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18931; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18932; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18933; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 18934; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 18935; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 18936; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 18937; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 18938; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18939; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 18940; GFX12-CU-NEXT: s_endpgm 18941 ptr addrspace(1) %out, i32 %in, i32 %old) { 18942entry: 18943 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 18944 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst 18945 %val0 = extractvalue { i32, i1 } %val, 0 18946 store i32 %val0, ptr addrspace(1) %out, align 4 18947 ret void 18948} 18949 18950define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( 18951; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 18952; GFX6: ; %bb.0: ; %entry 18953; GFX6-NEXT: s_mov_b64 s[6:7], s[8:9] 18954; GFX6-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18955; GFX6-NEXT: s_load_dword s9, s[6:7], 0x2 18956; GFX6-NEXT: s_load_dword s8, s[6:7], 0x3 18957; GFX6-NEXT: s_waitcnt lgkmcnt(0) 18958; GFX6-NEXT: s_mov_b32 s12, s5 18959; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 18960; GFX6-NEXT: s_mov_b32 s10, 0x100f000 18961; GFX6-NEXT: s_mov_b32 s11, -1 18962; GFX6-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 18963; GFX6-NEXT: s_mov_b32 s5, s12 18964; GFX6-NEXT: s_mov_b32 s6, s11 18965; GFX6-NEXT: s_mov_b32 s7, s10 18966; GFX6-NEXT: v_mov_b32_e32 v0, s9 18967; GFX6-NEXT: v_mov_b32_e32 v2, s8 18968; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 18969; GFX6-NEXT: v_mov_b32_e32 v1, v2 18970; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 18971; GFX6-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 18972; GFX6-NEXT: s_waitcnt vmcnt(0) 18973; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 18974; GFX6-NEXT: s_endpgm 18975; 18976; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 18977; GFX7: ; %bb.0: ; %entry 18978; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18979; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18980; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18981; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18982; GFX7-NEXT: s_mov_b64 s[12:13], 16 18983; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18984; GFX7-NEXT: s_mov_b32 s6, s4 18985; GFX7-NEXT: s_mov_b32 s7, s5 18986; GFX7-NEXT: s_mov_b32 s11, s12 18987; GFX7-NEXT: s_mov_b32 s10, s13 18988; GFX7-NEXT: s_add_u32 s6, s6, s11 18989; GFX7-NEXT: s_addc_u32 s10, s7, s10 18990; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18991; GFX7-NEXT: s_mov_b32 s7, s10 18992; GFX7-NEXT: v_mov_b32_e32 v2, s9 18993; GFX7-NEXT: v_mov_b32_e32 v0, s8 18994; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18995; GFX7-NEXT: v_mov_b32_e32 v3, v0 18996; GFX7-NEXT: v_mov_b32_e32 v0, s6 18997; GFX7-NEXT: v_mov_b32_e32 v1, s7 18998; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18999; GFX7-NEXT: v_mov_b32_e32 v0, s4 19000; GFX7-NEXT: v_mov_b32_e32 v1, s5 19001; GFX7-NEXT: s_waitcnt vmcnt(0) 19002; GFX7-NEXT: flat_store_dword v[0:1], v2 19003; GFX7-NEXT: s_endpgm 19004; 19005; GFX10-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19006; GFX10-WGP: ; %bb.0: ; %entry 19007; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 19008; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19009; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x8 19010; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0xc 19011; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19012; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19013; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s6 19014; GFX10-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19015; GFX10-WGP-NEXT: v_mov_b32_e32 v2, v3 19016; GFX10-WGP-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19017; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19018; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] 19019; GFX10-WGP-NEXT: s_endpgm 19020; 19021; GFX10-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19022; GFX10-CU: ; %bb.0: ; %entry 19023; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 19024; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19025; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x8 19026; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0xc 19027; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19028; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19029; GFX10-CU-NEXT: v_mov_b32_e32 v3, s6 19030; GFX10-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19031; GFX10-CU-NEXT: v_mov_b32_e32 v2, v3 19032; GFX10-CU-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc 19033; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19034; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] 19035; GFX10-CU-NEXT: s_endpgm 19036; 19037; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19038; SKIP-CACHE-INV: ; %bb.0: ; %entry 19039; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19040; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19041; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19042; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19043; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19044; SKIP-CACHE-INV-NEXT: s_mov_b32 s8, s1 19045; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 19046; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0xf000 19047; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, -1 19048; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3 19049; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s8 19050; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s7 19051; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19052; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s5 19053; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s4 19054; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 19055; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, v2 19056; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc 19057; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 19058; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19059; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 19060; SKIP-CACHE-INV-NEXT: s_endpgm 19061; 19062; GFX90A-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19063; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19064; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19065; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19066; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19067; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19068; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19069; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19070; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19071; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19072; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19073; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19074; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19075; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19076; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19077; 19078; GFX90A-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19079; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19080; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19081; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19082; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19083; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19084; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19085; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19086; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s6 19087; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19088; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19089; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc 19090; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19091; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] 19092; GFX90A-TGSPLIT-NEXT: s_endpgm 19093; 19094; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19095; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19096; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19097; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19098; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19099; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19100; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19101; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19102; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19103; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19104; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19105; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19106; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19107; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19108; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19109; 19110; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19111; GFX940-TGSPLIT: ; %bb.0: ; %entry 19112; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 19113; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19114; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19115; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19116; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19117; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19118; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 19119; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19120; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 19121; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 19122; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19123; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 19124; GFX940-TGSPLIT-NEXT: s_endpgm 19125; 19126; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19127; GFX11-WGP: ; %bb.0: ; %entry 19128; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 19129; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19130; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19131; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19132; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19133; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 19134; GFX11-WGP-NEXT: v_mov_b32_e32 v3, s2 19135; GFX11-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19136; GFX11-WGP-NEXT: v_mov_b32_e32 v2, v3 19137; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19138; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19139; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19140; GFX11-WGP-NEXT: s_endpgm 19141; 19142; GFX11-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19143; GFX11-CU: ; %bb.0: ; %entry 19144; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 19145; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19146; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19147; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19148; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19149; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 19150; GFX11-CU-NEXT: v_mov_b32_e32 v3, s2 19151; GFX11-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19152; GFX11-CU-NEXT: v_mov_b32_e32 v2, v3 19153; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc 19154; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19155; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19156; GFX11-CU-NEXT: s_endpgm 19157; 19158; GFX12-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19159; GFX12-WGP: ; %bb.0: ; %entry 19160; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 19161; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19162; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19163; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19164; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19165; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 19166; GFX12-WGP-NEXT: v_mov_b32_e32 v3, s2 19167; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19168; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 19169; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 19170; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19171; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] 19172; GFX12-WGP-NEXT: s_endpgm 19173; 19174; GFX12-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: 19175; GFX12-CU: ; %bb.0: ; %entry 19176; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 19177; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19178; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19179; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19180; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19181; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 19182; GFX12-CU-NEXT: v_mov_b32_e32 v3, s2 19183; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 19184; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 19185; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN 19186; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19187; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] 19188; GFX12-CU-NEXT: s_endpgm 19189 ptr addrspace(1) %out, i32 %in, i32 %old) { 19190entry: 19191 %gep = getelementptr i32, ptr addrspace(1) %out, i32 4 19192 %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst 19193 %val0 = extractvalue { i32, i1 } %val, 0 19194 store i32 %val0, ptr addrspace(1) %out, align 4 19195 ret void 19196} 19197