1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s 10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s 11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s 12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s 13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s 14 15define amdgpu_kernel void @flat_system_unordered_load( 16; GFX7-LABEL: flat_system_unordered_load: 17; GFX7: ; %bb.0: ; %entry 18; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 19; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 20; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21; GFX7-NEXT: v_mov_b32_e32 v0, s6 22; GFX7-NEXT: v_mov_b32_e32 v1, s7 23; GFX7-NEXT: flat_load_dword v2, v[0:1] 24; GFX7-NEXT: v_mov_b32_e32 v0, s4 25; GFX7-NEXT: v_mov_b32_e32 v1, s5 26; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 27; GFX7-NEXT: flat_store_dword v[0:1], v2 28; GFX7-NEXT: s_endpgm 29; 30; GFX10-WGP-LABEL: flat_system_unordered_load: 31; GFX10-WGP: ; %bb.0: ; %entry 32; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 33; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 34; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 35; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 36; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 37; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] 38; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 39; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 40; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 41; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 42; GFX10-WGP-NEXT: s_endpgm 43; 44; GFX10-CU-LABEL: flat_system_unordered_load: 45; GFX10-CU: ; %bb.0: ; %entry 46; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 47; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 48; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 49; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 50; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 51; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] 52; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 53; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 54; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 55; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 56; GFX10-CU-NEXT: s_endpgm 57; 58; SKIP-CACHE-INV-LABEL: flat_system_unordered_load: 59; SKIP-CACHE-INV: ; %bb.0: ; %entry 60; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 61; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 62; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 63; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 64; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 65; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] 66; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 67; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 68; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 69; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 70; SKIP-CACHE-INV-NEXT: s_endpgm 71; 72; GFX90A-NOTTGSPLIT-LABEL: flat_system_unordered_load: 73; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 74; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 75; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 76; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 77; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 78; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] 79; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 80; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 81; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 82; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 83; 84; GFX90A-TGSPLIT-LABEL: flat_system_unordered_load: 85; GFX90A-TGSPLIT: ; %bb.0: ; %entry 86; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 87; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 88; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 89; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 90; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] 91; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 92; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 93; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 94; GFX90A-TGSPLIT-NEXT: s_endpgm 95; 96; GFX940-NOTTGSPLIT-LABEL: flat_system_unordered_load: 97; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 98; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 99; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 100; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 101; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 102; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] 103; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 104; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 105; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 106; GFX940-NOTTGSPLIT-NEXT: s_endpgm 107; 108; GFX940-TGSPLIT-LABEL: flat_system_unordered_load: 109; GFX940-TGSPLIT: ; %bb.0: ; %entry 110; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 111; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 112; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 113; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 114; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] 115; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 116; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 117; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 118; GFX940-TGSPLIT-NEXT: s_endpgm 119; 120; GFX11-WGP-LABEL: flat_system_unordered_load: 121; GFX11-WGP: ; %bb.0: ; %entry 122; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 123; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 124; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 125; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 126; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 127; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] 128; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 129; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 130; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 131; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 132; GFX11-WGP-NEXT: s_endpgm 133; 134; GFX11-CU-LABEL: flat_system_unordered_load: 135; GFX11-CU: ; %bb.0: ; %entry 136; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 137; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 138; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 139; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 140; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 141; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] 142; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 143; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 144; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 145; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 146; GFX11-CU-NEXT: s_endpgm 147; 148; GFX12-WGP-LABEL: flat_system_unordered_load: 149; GFX12-WGP: ; %bb.0: ; %entry 150; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 151; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 152; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 153; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 154; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 155; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] 156; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 157; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 158; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 159; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 160; GFX12-WGP-NEXT: s_endpgm 161; 162; GFX12-CU-LABEL: flat_system_unordered_load: 163; GFX12-CU: ; %bb.0: ; %entry 164; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 165; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 166; GFX12-CU-NEXT: s_wait_kmcnt 0x0 167; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 168; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 169; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] 170; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 171; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 172; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 173; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 174; GFX12-CU-NEXT: s_endpgm 175 ptr %in, ptr %out) { 176entry: 177 %val = load atomic i32, ptr %in unordered, align 4 178 store i32 %val, ptr %out 179 ret void 180} 181 182define amdgpu_kernel void @flat_system_monotonic_load( 183; GFX7-LABEL: flat_system_monotonic_load: 184; GFX7: ; %bb.0: ; %entry 185; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 186; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 187; GFX7-NEXT: s_waitcnt lgkmcnt(0) 188; GFX7-NEXT: v_mov_b32_e32 v0, s6 189; GFX7-NEXT: v_mov_b32_e32 v1, s7 190; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 191; GFX7-NEXT: v_mov_b32_e32 v0, s4 192; GFX7-NEXT: v_mov_b32_e32 v1, s5 193; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 194; GFX7-NEXT: flat_store_dword v[0:1], v2 195; GFX7-NEXT: s_endpgm 196; 197; GFX10-WGP-LABEL: flat_system_monotonic_load: 198; GFX10-WGP: ; %bb.0: ; %entry 199; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 200; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 201; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 202; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 203; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 204; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 205; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 206; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 207; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 208; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 209; GFX10-WGP-NEXT: s_endpgm 210; 211; GFX10-CU-LABEL: flat_system_monotonic_load: 212; GFX10-CU: ; %bb.0: ; %entry 213; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 214; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 215; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 216; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 217; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 218; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 219; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 220; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 221; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 222; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 223; GFX10-CU-NEXT: s_endpgm 224; 225; SKIP-CACHE-INV-LABEL: flat_system_monotonic_load: 226; SKIP-CACHE-INV: ; %bb.0: ; %entry 227; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 228; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 229; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 230; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 231; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 232; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 233; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 234; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 235; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 236; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 237; SKIP-CACHE-INV-NEXT: s_endpgm 238; 239; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_load: 240; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 241; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 242; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 243; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 244; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 245; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 246; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 247; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 248; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 249; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 250; 251; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_load: 252; GFX90A-TGSPLIT: ; %bb.0: ; %entry 253; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 254; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 255; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 256; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 257; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 258; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 259; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 260; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 261; GFX90A-TGSPLIT-NEXT: s_endpgm 262; 263; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_load: 264; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 265; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 266; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 267; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 268; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 269; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 270; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 271; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 272; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 273; GFX940-NOTTGSPLIT-NEXT: s_endpgm 274; 275; GFX940-TGSPLIT-LABEL: flat_system_monotonic_load: 276; GFX940-TGSPLIT: ; %bb.0: ; %entry 277; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 278; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 279; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 280; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 281; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 282; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 283; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 284; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 285; GFX940-TGSPLIT-NEXT: s_endpgm 286; 287; GFX11-WGP-LABEL: flat_system_monotonic_load: 288; GFX11-WGP: ; %bb.0: ; %entry 289; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 290; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 291; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 292; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 293; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 294; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 295; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 296; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 297; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 298; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 299; GFX11-WGP-NEXT: s_endpgm 300; 301; GFX11-CU-LABEL: flat_system_monotonic_load: 302; GFX11-CU: ; %bb.0: ; %entry 303; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 304; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 305; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 306; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 307; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 308; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 309; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 310; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 311; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 312; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 313; GFX11-CU-NEXT: s_endpgm 314; 315; GFX12-WGP-LABEL: flat_system_monotonic_load: 316; GFX12-WGP: ; %bb.0: ; %entry 317; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 318; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 319; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 320; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 321; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 322; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 323; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 324; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 325; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 326; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 327; GFX12-WGP-NEXT: s_endpgm 328; 329; GFX12-CU-LABEL: flat_system_monotonic_load: 330; GFX12-CU: ; %bb.0: ; %entry 331; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 332; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 333; GFX12-CU-NEXT: s_wait_kmcnt 0x0 334; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 335; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 336; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 337; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 338; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 339; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 340; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 341; GFX12-CU-NEXT: s_endpgm 342 ptr %in, ptr %out) { 343entry: 344 %val = load atomic i32, ptr %in monotonic, align 4 345 store i32 %val, ptr %out 346 ret void 347} 348 349define amdgpu_kernel void @flat_system_acquire_load( 350; GFX7-LABEL: flat_system_acquire_load: 351; GFX7: ; %bb.0: ; %entry 352; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 353; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 354; GFX7-NEXT: s_waitcnt lgkmcnt(0) 355; GFX7-NEXT: v_mov_b32_e32 v0, s6 356; GFX7-NEXT: v_mov_b32_e32 v1, s7 357; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 358; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 359; GFX7-NEXT: buffer_wbinvl1_vol 360; GFX7-NEXT: v_mov_b32_e32 v0, s4 361; GFX7-NEXT: v_mov_b32_e32 v1, s5 362; GFX7-NEXT: flat_store_dword v[0:1], v2 363; GFX7-NEXT: s_endpgm 364; 365; GFX10-WGP-LABEL: flat_system_acquire_load: 366; GFX10-WGP: ; %bb.0: ; %entry 367; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 368; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 369; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 370; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 371; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 372; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 373; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 374; GFX10-WGP-NEXT: buffer_gl1_inv 375; GFX10-WGP-NEXT: buffer_gl0_inv 376; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 377; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 378; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 379; GFX10-WGP-NEXT: s_endpgm 380; 381; GFX10-CU-LABEL: flat_system_acquire_load: 382; GFX10-CU: ; %bb.0: ; %entry 383; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 384; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 385; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 386; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 387; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 388; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 389; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 390; GFX10-CU-NEXT: buffer_gl1_inv 391; GFX10-CU-NEXT: buffer_gl0_inv 392; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 393; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 394; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 395; GFX10-CU-NEXT: s_endpgm 396; 397; SKIP-CACHE-INV-LABEL: flat_system_acquire_load: 398; SKIP-CACHE-INV: ; %bb.0: ; %entry 399; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 400; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 401; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 402; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 403; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 404; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 405; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 406; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 407; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 408; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 409; SKIP-CACHE-INV-NEXT: s_endpgm 410; 411; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_load: 412; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 413; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 414; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 415; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 416; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 417; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 418; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 419; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 420; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 421; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 422; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 423; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 424; 425; GFX90A-TGSPLIT-LABEL: flat_system_acquire_load: 426; GFX90A-TGSPLIT: ; %bb.0: ; %entry 427; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 428; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 429; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 430; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 431; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 432; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 433; GFX90A-TGSPLIT-NEXT: buffer_invl2 434; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 435; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 436; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 437; GFX90A-TGSPLIT-NEXT: s_endpgm 438; 439; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_load: 440; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 441; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 442; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 443; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 444; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 445; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 446; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 447; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 448; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 449; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 450; GFX940-NOTTGSPLIT-NEXT: s_endpgm 451; 452; GFX940-TGSPLIT-LABEL: flat_system_acquire_load: 453; GFX940-TGSPLIT: ; %bb.0: ; %entry 454; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 455; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 456; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 457; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 458; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 459; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 460; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 461; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 462; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 463; GFX940-TGSPLIT-NEXT: s_endpgm 464; 465; GFX11-WGP-LABEL: flat_system_acquire_load: 466; GFX11-WGP: ; %bb.0: ; %entry 467; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 468; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 469; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 470; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 471; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 472; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 473; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 474; GFX11-WGP-NEXT: buffer_gl1_inv 475; GFX11-WGP-NEXT: buffer_gl0_inv 476; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 477; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 478; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 479; GFX11-WGP-NEXT: s_endpgm 480; 481; GFX11-CU-LABEL: flat_system_acquire_load: 482; GFX11-CU: ; %bb.0: ; %entry 483; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 484; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 485; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 486; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 487; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 488; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 489; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 490; GFX11-CU-NEXT: buffer_gl1_inv 491; GFX11-CU-NEXT: buffer_gl0_inv 492; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 493; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 494; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 495; GFX11-CU-NEXT: s_endpgm 496; 497; GFX12-WGP-LABEL: flat_system_acquire_load: 498; GFX12-WGP: ; %bb.0: ; %entry 499; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 500; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 501; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 502; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 503; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 504; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 505; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 506; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 507; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 508; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 509; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 510; GFX12-WGP-NEXT: s_endpgm 511; 512; GFX12-CU-LABEL: flat_system_acquire_load: 513; GFX12-CU: ; %bb.0: ; %entry 514; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 515; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 516; GFX12-CU-NEXT: s_wait_kmcnt 0x0 517; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 518; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 519; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 520; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 521; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 522; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 523; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 524; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 525; GFX12-CU-NEXT: s_endpgm 526 ptr %in, ptr %out) { 527entry: 528 %val = load atomic i32, ptr %in acquire, align 4 529 store i32 %val, ptr %out 530 ret void 531} 532 533define amdgpu_kernel void @flat_system_seq_cst_load( 534; GFX7-LABEL: flat_system_seq_cst_load: 535; GFX7: ; %bb.0: ; %entry 536; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 537; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 538; GFX7-NEXT: s_waitcnt lgkmcnt(0) 539; GFX7-NEXT: v_mov_b32_e32 v0, s6 540; GFX7-NEXT: v_mov_b32_e32 v1, s7 541; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 542; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 543; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 544; GFX7-NEXT: buffer_wbinvl1_vol 545; GFX7-NEXT: v_mov_b32_e32 v0, s4 546; GFX7-NEXT: v_mov_b32_e32 v1, s5 547; GFX7-NEXT: flat_store_dword v[0:1], v2 548; GFX7-NEXT: s_endpgm 549; 550; GFX10-WGP-LABEL: flat_system_seq_cst_load: 551; GFX10-WGP: ; %bb.0: ; %entry 552; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 553; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 554; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 555; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 556; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 557; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 558; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 559; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 560; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 561; GFX10-WGP-NEXT: buffer_gl1_inv 562; GFX10-WGP-NEXT: buffer_gl0_inv 563; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 564; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 565; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 566; GFX10-WGP-NEXT: s_endpgm 567; 568; GFX10-CU-LABEL: flat_system_seq_cst_load: 569; GFX10-CU: ; %bb.0: ; %entry 570; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 571; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 572; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 573; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 574; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 575; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 576; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 577; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 578; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 579; GFX10-CU-NEXT: buffer_gl1_inv 580; GFX10-CU-NEXT: buffer_gl0_inv 581; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 582; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 583; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 584; GFX10-CU-NEXT: s_endpgm 585; 586; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_load: 587; SKIP-CACHE-INV: ; %bb.0: ; %entry 588; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 589; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 590; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 591; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 592; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 593; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 594; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 595; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 596; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 597; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 598; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 599; SKIP-CACHE-INV-NEXT: s_endpgm 600; 601; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_load: 602; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 603; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 604; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 605; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 606; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 607; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 608; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 609; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 610; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 611; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 612; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 613; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 614; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 615; 616; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_load: 617; GFX90A-TGSPLIT: ; %bb.0: ; %entry 618; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 619; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 620; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 621; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 622; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 623; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 624; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 625; GFX90A-TGSPLIT-NEXT: buffer_invl2 626; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 627; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 628; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 629; GFX90A-TGSPLIT-NEXT: s_endpgm 630; 631; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_load: 632; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 633; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 634; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 635; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 636; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 637; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 638; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 639; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 640; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 641; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 642; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 643; GFX940-NOTTGSPLIT-NEXT: s_endpgm 644; 645; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_load: 646; GFX940-TGSPLIT: ; %bb.0: ; %entry 647; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 648; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 649; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 650; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 651; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 652; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 653; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 654; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 655; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 656; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 657; GFX940-TGSPLIT-NEXT: s_endpgm 658; 659; GFX11-WGP-LABEL: flat_system_seq_cst_load: 660; GFX11-WGP: ; %bb.0: ; %entry 661; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 662; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 663; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 664; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 665; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 666; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 667; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 668; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 669; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 670; GFX11-WGP-NEXT: buffer_gl1_inv 671; GFX11-WGP-NEXT: buffer_gl0_inv 672; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 673; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 674; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 675; GFX11-WGP-NEXT: s_endpgm 676; 677; GFX11-CU-LABEL: flat_system_seq_cst_load: 678; GFX11-CU: ; %bb.0: ; %entry 679; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 680; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 681; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 682; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 683; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 684; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 685; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 686; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 687; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 688; GFX11-CU-NEXT: buffer_gl1_inv 689; GFX11-CU-NEXT: buffer_gl0_inv 690; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 691; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 692; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 693; GFX11-CU-NEXT: s_endpgm 694; 695; GFX12-WGP-LABEL: flat_system_seq_cst_load: 696; GFX12-WGP: ; %bb.0: ; %entry 697; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 698; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 699; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 700; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 701; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 702; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 703; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 704; GFX12-WGP-NEXT: s_wait_storecnt 0x0 705; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 706; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 707; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 708; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 709; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 710; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 711; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 712; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 713; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 714; GFX12-WGP-NEXT: s_endpgm 715; 716; GFX12-CU-LABEL: flat_system_seq_cst_load: 717; GFX12-CU: ; %bb.0: ; %entry 718; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 719; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 720; GFX12-CU-NEXT: s_wait_kmcnt 0x0 721; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 722; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 723; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 724; GFX12-CU-NEXT: s_wait_samplecnt 0x0 725; GFX12-CU-NEXT: s_wait_storecnt 0x0 726; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 727; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 728; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 729; GFX12-CU-NEXT: s_wait_samplecnt 0x0 730; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 731; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 732; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 733; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 734; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 735; GFX12-CU-NEXT: s_endpgm 736 ptr %in, ptr %out) { 737entry: 738 %val = load atomic i32, ptr %in seq_cst, align 4 739 store i32 %val, ptr %out 740 ret void 741} 742 743define amdgpu_kernel void @flat_system_unordered_store( 744; GFX7-LABEL: flat_system_unordered_store: 745; GFX7: ; %bb.0: ; %entry 746; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 747; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 748; GFX7-NEXT: s_waitcnt lgkmcnt(0) 749; GFX7-NEXT: v_mov_b32_e32 v0, s6 750; GFX7-NEXT: v_mov_b32_e32 v1, s7 751; GFX7-NEXT: v_mov_b32_e32 v2, s4 752; GFX7-NEXT: flat_store_dword v[0:1], v2 753; GFX7-NEXT: s_endpgm 754; 755; GFX10-WGP-LABEL: flat_system_unordered_store: 756; GFX10-WGP: ; %bb.0: ; %entry 757; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 758; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 759; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 760; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 761; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 762; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 763; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 764; GFX10-WGP-NEXT: s_endpgm 765; 766; GFX10-CU-LABEL: flat_system_unordered_store: 767; GFX10-CU: ; %bb.0: ; %entry 768; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 769; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 770; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 771; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 772; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 773; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 774; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 775; GFX10-CU-NEXT: s_endpgm 776; 777; SKIP-CACHE-INV-LABEL: flat_system_unordered_store: 778; SKIP-CACHE-INV: ; %bb.0: ; %entry 779; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 780; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 781; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 782; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 783; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 784; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 785; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 786; SKIP-CACHE-INV-NEXT: s_endpgm 787; 788; GFX90A-NOTTGSPLIT-LABEL: flat_system_unordered_store: 789; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 790; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 791; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 792; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 793; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 794; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 795; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 796; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 797; 798; GFX90A-TGSPLIT-LABEL: flat_system_unordered_store: 799; GFX90A-TGSPLIT: ; %bb.0: ; %entry 800; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 801; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 802; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 803; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 804; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 805; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 806; GFX90A-TGSPLIT-NEXT: s_endpgm 807; 808; GFX940-NOTTGSPLIT-LABEL: flat_system_unordered_store: 809; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 810; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 811; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 812; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 813; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 814; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 815; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 816; GFX940-NOTTGSPLIT-NEXT: s_endpgm 817; 818; GFX940-TGSPLIT-LABEL: flat_system_unordered_store: 819; GFX940-TGSPLIT: ; %bb.0: ; %entry 820; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 821; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 822; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 823; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 824; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 825; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 826; GFX940-TGSPLIT-NEXT: s_endpgm 827; 828; GFX11-WGP-LABEL: flat_system_unordered_store: 829; GFX11-WGP: ; %bb.0: ; %entry 830; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 831; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 832; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 833; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 834; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 835; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 836; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 837; GFX11-WGP-NEXT: s_endpgm 838; 839; GFX11-CU-LABEL: flat_system_unordered_store: 840; GFX11-CU: ; %bb.0: ; %entry 841; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 842; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 843; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 844; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 845; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 846; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 847; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 848; GFX11-CU-NEXT: s_endpgm 849; 850; GFX12-WGP-LABEL: flat_system_unordered_store: 851; GFX12-WGP: ; %bb.0: ; %entry 852; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 853; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 854; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 855; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 856; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 857; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 858; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 859; GFX12-WGP-NEXT: s_endpgm 860; 861; GFX12-CU-LABEL: flat_system_unordered_store: 862; GFX12-CU: ; %bb.0: ; %entry 863; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 864; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 865; GFX12-CU-NEXT: s_wait_kmcnt 0x0 866; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 867; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 868; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 869; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 870; GFX12-CU-NEXT: s_endpgm 871 i32 %in, ptr %out) { 872entry: 873 store atomic i32 %in, ptr %out unordered, align 4 874 ret void 875} 876 877define amdgpu_kernel void @flat_system_monotonic_store( 878; GFX7-LABEL: flat_system_monotonic_store: 879; GFX7: ; %bb.0: ; %entry 880; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 881; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 882; GFX7-NEXT: s_waitcnt lgkmcnt(0) 883; GFX7-NEXT: v_mov_b32_e32 v0, s6 884; GFX7-NEXT: v_mov_b32_e32 v1, s7 885; GFX7-NEXT: v_mov_b32_e32 v2, s4 886; GFX7-NEXT: flat_store_dword v[0:1], v2 887; GFX7-NEXT: s_endpgm 888; 889; GFX10-WGP-LABEL: flat_system_monotonic_store: 890; GFX10-WGP: ; %bb.0: ; %entry 891; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 892; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 893; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 894; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 895; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 896; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 897; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 898; GFX10-WGP-NEXT: s_endpgm 899; 900; GFX10-CU-LABEL: flat_system_monotonic_store: 901; GFX10-CU: ; %bb.0: ; %entry 902; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 903; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 904; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 905; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 906; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 907; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 908; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 909; GFX10-CU-NEXT: s_endpgm 910; 911; SKIP-CACHE-INV-LABEL: flat_system_monotonic_store: 912; SKIP-CACHE-INV: ; %bb.0: ; %entry 913; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 914; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 915; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 916; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 917; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 918; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 919; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 920; SKIP-CACHE-INV-NEXT: s_endpgm 921; 922; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_store: 923; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 924; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 925; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 926; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 927; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 928; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 929; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 930; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 931; 932; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_store: 933; GFX90A-TGSPLIT: ; %bb.0: ; %entry 934; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 935; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 936; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 937; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 938; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 939; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 940; GFX90A-TGSPLIT-NEXT: s_endpgm 941; 942; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_store: 943; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 944; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 945; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 946; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 947; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 948; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 949; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 950; GFX940-NOTTGSPLIT-NEXT: s_endpgm 951; 952; GFX940-TGSPLIT-LABEL: flat_system_monotonic_store: 953; GFX940-TGSPLIT: ; %bb.0: ; %entry 954; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 955; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 956; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 957; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 958; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 959; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 960; GFX940-TGSPLIT-NEXT: s_endpgm 961; 962; GFX11-WGP-LABEL: flat_system_monotonic_store: 963; GFX11-WGP: ; %bb.0: ; %entry 964; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 965; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 966; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 967; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 968; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 969; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 970; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 971; GFX11-WGP-NEXT: s_endpgm 972; 973; GFX11-CU-LABEL: flat_system_monotonic_store: 974; GFX11-CU: ; %bb.0: ; %entry 975; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 976; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 977; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 978; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 979; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 980; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 981; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 982; GFX11-CU-NEXT: s_endpgm 983; 984; GFX12-WGP-LABEL: flat_system_monotonic_store: 985; GFX12-WGP: ; %bb.0: ; %entry 986; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 987; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 988; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 989; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 990; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 991; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 992; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 993; GFX12-WGP-NEXT: s_endpgm 994; 995; GFX12-CU-LABEL: flat_system_monotonic_store: 996; GFX12-CU: ; %bb.0: ; %entry 997; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 998; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 999; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1000; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1001; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1002; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1003; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 1004; GFX12-CU-NEXT: s_endpgm 1005 i32 %in, ptr %out) { 1006entry: 1007 store atomic i32 %in, ptr %out monotonic, align 4 1008 ret void 1009} 1010 1011define amdgpu_kernel void @flat_system_release_store( 1012; GFX7-LABEL: flat_system_release_store: 1013; GFX7: ; %bb.0: ; %entry 1014; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1015; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1016; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX7-NEXT: v_mov_b32_e32 v0, s6 1018; GFX7-NEXT: v_mov_b32_e32 v1, s7 1019; GFX7-NEXT: v_mov_b32_e32 v2, s4 1020; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1021; GFX7-NEXT: flat_store_dword v[0:1], v2 1022; GFX7-NEXT: s_endpgm 1023; 1024; GFX10-WGP-LABEL: flat_system_release_store: 1025; GFX10-WGP: ; %bb.0: ; %entry 1026; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 1027; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1028; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1029; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1030; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1031; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1032; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1033; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1034; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 1035; GFX10-WGP-NEXT: s_endpgm 1036; 1037; GFX10-CU-LABEL: flat_system_release_store: 1038; GFX10-CU: ; %bb.0: ; %entry 1039; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 1040; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1041; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1042; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1043; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1044; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1045; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1046; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1047; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 1048; GFX10-CU-NEXT: s_endpgm 1049; 1050; SKIP-CACHE-INV-LABEL: flat_system_release_store: 1051; SKIP-CACHE-INV: ; %bb.0: ; %entry 1052; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 1053; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1054; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1055; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1056; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1057; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1058; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1059; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 1060; SKIP-CACHE-INV-NEXT: s_endpgm 1061; 1062; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_store: 1063; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1064; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 1065; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1066; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1067; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1068; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1069; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1070; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1071; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 1072; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1073; 1074; GFX90A-TGSPLIT-LABEL: flat_system_release_store: 1075; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1076; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 1077; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1078; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1079; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1080; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1081; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1082; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1083; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 1084; GFX90A-TGSPLIT-NEXT: s_endpgm 1085; 1086; GFX940-NOTTGSPLIT-LABEL: flat_system_release_store: 1087; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1088; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 1089; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1090; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1091; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1092; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1093; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1094; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1095; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 1096; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1097; 1098; GFX940-TGSPLIT-LABEL: flat_system_release_store: 1099; GFX940-TGSPLIT: ; %bb.0: ; %entry 1100; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 1101; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1102; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1103; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1104; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1105; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1106; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1107; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 1108; GFX940-TGSPLIT-NEXT: s_endpgm 1109; 1110; GFX11-WGP-LABEL: flat_system_release_store: 1111; GFX11-WGP: ; %bb.0: ; %entry 1112; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 1113; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1114; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1115; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1116; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1117; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1118; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1119; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1120; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 1121; GFX11-WGP-NEXT: s_endpgm 1122; 1123; GFX11-CU-LABEL: flat_system_release_store: 1124; GFX11-CU: ; %bb.0: ; %entry 1125; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 1126; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1127; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1128; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1129; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1130; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1131; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1132; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1133; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 1134; GFX11-CU-NEXT: s_endpgm 1135; 1136; GFX12-WGP-LABEL: flat_system_release_store: 1137; GFX12-WGP: ; %bb.0: ; %entry 1138; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 1139; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1140; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1141; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1142; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1143; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1144; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1145; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1146; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1147; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1148; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1149; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 1150; GFX12-WGP-NEXT: s_endpgm 1151; 1152; GFX12-CU-LABEL: flat_system_release_store: 1153; GFX12-CU: ; %bb.0: ; %entry 1154; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 1155; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1156; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1157; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1158; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1159; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1160; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1161; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1162; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1163; GFX12-CU-NEXT: s_wait_storecnt 0x0 1164; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1165; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 1166; GFX12-CU-NEXT: s_endpgm 1167 i32 %in, ptr %out) { 1168entry: 1169 store atomic i32 %in, ptr %out release, align 4 1170 ret void 1171} 1172 1173define amdgpu_kernel void @flat_system_seq_cst_store( 1174; GFX7-LABEL: flat_system_seq_cst_store: 1175; GFX7: ; %bb.0: ; %entry 1176; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 1177; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 1178; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1179; GFX7-NEXT: v_mov_b32_e32 v0, s6 1180; GFX7-NEXT: v_mov_b32_e32 v1, s7 1181; GFX7-NEXT: v_mov_b32_e32 v2, s4 1182; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1183; GFX7-NEXT: flat_store_dword v[0:1], v2 1184; GFX7-NEXT: s_endpgm 1185; 1186; GFX10-WGP-LABEL: flat_system_seq_cst_store: 1187; GFX10-WGP: ; %bb.0: ; %entry 1188; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 1189; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1190; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1191; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1192; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1193; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1194; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1195; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1196; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 1197; GFX10-WGP-NEXT: s_endpgm 1198; 1199; GFX10-CU-LABEL: flat_system_seq_cst_store: 1200; GFX10-CU: ; %bb.0: ; %entry 1201; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 1202; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1203; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1204; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1205; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1206; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1207; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1208; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1209; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 1210; GFX10-CU-NEXT: s_endpgm 1211; 1212; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_store: 1213; SKIP-CACHE-INV: ; %bb.0: ; %entry 1214; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 1215; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1216; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1217; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1218; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1219; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1220; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1221; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 1222; SKIP-CACHE-INV-NEXT: s_endpgm 1223; 1224; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_store: 1225; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1226; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 1227; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1228; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1229; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1230; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1231; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1232; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1233; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 1234; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1235; 1236; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_store: 1237; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1238; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 1239; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 1240; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1241; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1242; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1243; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1244; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1245; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 1246; GFX90A-TGSPLIT-NEXT: s_endpgm 1247; 1248; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_store: 1249; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1250; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 1251; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1252; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1253; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1254; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1255; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1256; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1257; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 1258; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1259; 1260; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_store: 1261; GFX940-TGSPLIT: ; %bb.0: ; %entry 1262; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 1263; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 1264; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1265; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1266; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1267; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1268; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1269; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 1270; GFX940-TGSPLIT-NEXT: s_endpgm 1271; 1272; GFX11-WGP-LABEL: flat_system_seq_cst_store: 1273; GFX11-WGP: ; %bb.0: ; %entry 1274; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 1275; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1276; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1277; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1278; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1279; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1280; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1281; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1282; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 1283; GFX11-WGP-NEXT: s_endpgm 1284; 1285; GFX11-CU-LABEL: flat_system_seq_cst_store: 1286; GFX11-CU: ; %bb.0: ; %entry 1287; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 1288; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1289; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1290; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1291; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1292; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1293; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1294; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1295; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 1296; GFX11-CU-NEXT: s_endpgm 1297; 1298; GFX12-WGP-LABEL: flat_system_seq_cst_store: 1299; GFX12-WGP: ; %bb.0: ; %entry 1300; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 1301; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1302; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1303; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1304; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1305; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1306; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1307; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1308; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1309; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1310; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1311; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 1312; GFX12-WGP-NEXT: s_endpgm 1313; 1314; GFX12-CU-LABEL: flat_system_seq_cst_store: 1315; GFX12-CU: ; %bb.0: ; %entry 1316; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 1317; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 1318; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1319; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1320; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1321; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1322; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1323; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1324; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1325; GFX12-CU-NEXT: s_wait_storecnt 0x0 1326; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1327; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 1328; GFX12-CU-NEXT: s_endpgm 1329 i32 %in, ptr %out) { 1330entry: 1331 store atomic i32 %in, ptr %out seq_cst, align 4 1332 ret void 1333} 1334 1335define amdgpu_kernel void @flat_system_monotonic_atomicrmw( 1336; GFX7-LABEL: flat_system_monotonic_atomicrmw: 1337; GFX7: ; %bb.0: ; %entry 1338; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1339; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1340; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1341; GFX7-NEXT: v_mov_b32_e32 v0, s6 1342; GFX7-NEXT: v_mov_b32_e32 v1, s7 1343; GFX7-NEXT: v_mov_b32_e32 v2, s4 1344; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1345; GFX7-NEXT: s_endpgm 1346; 1347; GFX10-WGP-LABEL: flat_system_monotonic_atomicrmw: 1348; GFX10-WGP: ; %bb.0: ; %entry 1349; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1350; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 1351; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1352; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1353; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1354; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1355; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 1356; GFX10-WGP-NEXT: s_endpgm 1357; 1358; GFX10-CU-LABEL: flat_system_monotonic_atomicrmw: 1359; GFX10-CU: ; %bb.0: ; %entry 1360; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1361; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 1362; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1363; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1364; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1365; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1366; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 1367; GFX10-CU-NEXT: s_endpgm 1368; 1369; SKIP-CACHE-INV-LABEL: flat_system_monotonic_atomicrmw: 1370; SKIP-CACHE-INV: ; %bb.0: ; %entry 1371; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1372; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 1373; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1374; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1375; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1376; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1377; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 1378; SKIP-CACHE-INV-NEXT: s_endpgm 1379; 1380; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_atomicrmw: 1381; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1382; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1383; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1384; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1385; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1386; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1387; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1388; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1389; 1390; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_atomicrmw: 1391; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1392; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1393; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1394; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1395; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1396; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1397; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1398; GFX90A-TGSPLIT-NEXT: s_endpgm 1399; 1400; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_atomicrmw: 1401; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1402; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1403; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1404; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1405; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1406; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1407; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1408; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1409; 1410; GFX940-TGSPLIT-LABEL: flat_system_monotonic_atomicrmw: 1411; GFX940-TGSPLIT: ; %bb.0: ; %entry 1412; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1413; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1414; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1415; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1416; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1417; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1418; GFX940-TGSPLIT-NEXT: s_endpgm 1419; 1420; GFX11-WGP-LABEL: flat_system_monotonic_atomicrmw: 1421; GFX11-WGP: ; %bb.0: ; %entry 1422; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1423; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1424; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1425; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1426; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1427; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1428; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 1429; GFX11-WGP-NEXT: s_endpgm 1430; 1431; GFX11-CU-LABEL: flat_system_monotonic_atomicrmw: 1432; GFX11-CU: ; %bb.0: ; %entry 1433; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1434; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1435; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1436; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1437; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1438; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1439; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 1440; GFX11-CU-NEXT: s_endpgm 1441; 1442; GFX12-WGP-LABEL: flat_system_monotonic_atomicrmw: 1443; GFX12-WGP: ; %bb.0: ; %entry 1444; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1445; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1446; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1447; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1448; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1449; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1450; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1451; GFX12-WGP-NEXT: s_endpgm 1452; 1453; GFX12-CU-LABEL: flat_system_monotonic_atomicrmw: 1454; GFX12-CU: ; %bb.0: ; %entry 1455; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1456; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1457; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1458; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1459; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1460; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1461; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1462; GFX12-CU-NEXT: s_endpgm 1463 ptr %out, i32 %in) { 1464entry: 1465 %val = atomicrmw volatile xchg ptr %out, i32 %in monotonic 1466 ret void 1467} 1468 1469define amdgpu_kernel void @flat_system_acquire_atomicrmw( 1470; GFX7-LABEL: flat_system_acquire_atomicrmw: 1471; GFX7: ; %bb.0: ; %entry 1472; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1473; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1474; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1475; GFX7-NEXT: v_mov_b32_e32 v0, s6 1476; GFX7-NEXT: v_mov_b32_e32 v1, s7 1477; GFX7-NEXT: v_mov_b32_e32 v2, s4 1478; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1479; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1480; GFX7-NEXT: buffer_wbinvl1_vol 1481; GFX7-NEXT: s_endpgm 1482; 1483; GFX10-WGP-LABEL: flat_system_acquire_atomicrmw: 1484; GFX10-WGP: ; %bb.0: ; %entry 1485; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1486; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 1487; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1488; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1489; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1490; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1491; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 1492; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1493; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1494; GFX10-WGP-NEXT: buffer_gl1_inv 1495; GFX10-WGP-NEXT: buffer_gl0_inv 1496; GFX10-WGP-NEXT: s_endpgm 1497; 1498; GFX10-CU-LABEL: flat_system_acquire_atomicrmw: 1499; GFX10-CU: ; %bb.0: ; %entry 1500; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1501; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 1502; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1503; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1504; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1505; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1506; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 1507; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1508; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1509; GFX10-CU-NEXT: buffer_gl1_inv 1510; GFX10-CU-NEXT: buffer_gl0_inv 1511; GFX10-CU-NEXT: s_endpgm 1512; 1513; SKIP-CACHE-INV-LABEL: flat_system_acquire_atomicrmw: 1514; SKIP-CACHE-INV: ; %bb.0: ; %entry 1515; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1516; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 1517; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1518; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1519; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1520; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1521; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 1522; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1523; SKIP-CACHE-INV-NEXT: s_endpgm 1524; 1525; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_atomicrmw: 1526; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1527; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1528; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1529; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1530; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1531; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1532; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1533; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1534; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1535; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1536; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1537; 1538; GFX90A-TGSPLIT-LABEL: flat_system_acquire_atomicrmw: 1539; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1540; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1541; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1542; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1543; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1544; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1545; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1546; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1547; GFX90A-TGSPLIT-NEXT: buffer_invl2 1548; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1549; GFX90A-TGSPLIT-NEXT: s_endpgm 1550; 1551; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_atomicrmw: 1552; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1553; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1554; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1555; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1556; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1557; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1558; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1559; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1560; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 1561; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1562; 1563; GFX940-TGSPLIT-LABEL: flat_system_acquire_atomicrmw: 1564; GFX940-TGSPLIT: ; %bb.0: ; %entry 1565; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1566; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1567; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1568; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1569; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1570; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1571; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1572; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 1573; GFX940-TGSPLIT-NEXT: s_endpgm 1574; 1575; GFX11-WGP-LABEL: flat_system_acquire_atomicrmw: 1576; GFX11-WGP: ; %bb.0: ; %entry 1577; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1578; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1579; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1580; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1581; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1582; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1583; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 1584; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1585; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1586; GFX11-WGP-NEXT: buffer_gl1_inv 1587; GFX11-WGP-NEXT: buffer_gl0_inv 1588; GFX11-WGP-NEXT: s_endpgm 1589; 1590; GFX11-CU-LABEL: flat_system_acquire_atomicrmw: 1591; GFX11-CU: ; %bb.0: ; %entry 1592; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1593; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1594; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1595; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1596; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1597; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1598; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 1599; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1600; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1601; GFX11-CU-NEXT: buffer_gl1_inv 1602; GFX11-CU-NEXT: buffer_gl0_inv 1603; GFX11-CU-NEXT: s_endpgm 1604; 1605; GFX12-WGP-LABEL: flat_system_acquire_atomicrmw: 1606; GFX12-WGP: ; %bb.0: ; %entry 1607; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1608; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1609; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1610; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1611; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1612; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1613; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1614; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 1615; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 1616; GFX12-WGP-NEXT: s_endpgm 1617; 1618; GFX12-CU-LABEL: flat_system_acquire_atomicrmw: 1619; GFX12-CU: ; %bb.0: ; %entry 1620; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1621; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1622; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1623; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1624; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1625; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1626; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1627; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 1628; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 1629; GFX12-CU-NEXT: s_endpgm 1630 ptr %out, i32 %in) { 1631entry: 1632 %val = atomicrmw volatile xchg ptr %out, i32 %in acquire 1633 ret void 1634} 1635 1636define amdgpu_kernel void @flat_system_release_atomicrmw( 1637; GFX7-LABEL: flat_system_release_atomicrmw: 1638; GFX7: ; %bb.0: ; %entry 1639; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1640; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1641; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1642; GFX7-NEXT: v_mov_b32_e32 v0, s6 1643; GFX7-NEXT: v_mov_b32_e32 v1, s7 1644; GFX7-NEXT: v_mov_b32_e32 v2, s4 1645; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1646; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1647; GFX7-NEXT: s_endpgm 1648; 1649; GFX10-WGP-LABEL: flat_system_release_atomicrmw: 1650; GFX10-WGP: ; %bb.0: ; %entry 1651; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1652; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 1653; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1654; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1655; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1656; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1657; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1658; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1659; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 1660; GFX10-WGP-NEXT: s_endpgm 1661; 1662; GFX10-CU-LABEL: flat_system_release_atomicrmw: 1663; GFX10-CU: ; %bb.0: ; %entry 1664; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1665; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 1666; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1667; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1668; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1669; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1670; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1671; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1672; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 1673; GFX10-CU-NEXT: s_endpgm 1674; 1675; SKIP-CACHE-INV-LABEL: flat_system_release_atomicrmw: 1676; SKIP-CACHE-INV: ; %bb.0: ; %entry 1677; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1678; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 1679; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1680; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1681; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1682; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1683; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1684; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 1685; SKIP-CACHE-INV-NEXT: s_endpgm 1686; 1687; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_atomicrmw: 1688; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1689; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1690; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1691; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1692; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1693; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1694; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1695; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1696; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1697; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1698; 1699; GFX90A-TGSPLIT-LABEL: flat_system_release_atomicrmw: 1700; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1701; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1702; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1703; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1704; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1705; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1706; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1707; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1708; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1709; GFX90A-TGSPLIT-NEXT: s_endpgm 1710; 1711; GFX940-NOTTGSPLIT-LABEL: flat_system_release_atomicrmw: 1712; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1713; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1714; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1715; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1716; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1717; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1718; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1719; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1720; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1721; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1722; 1723; GFX940-TGSPLIT-LABEL: flat_system_release_atomicrmw: 1724; GFX940-TGSPLIT: ; %bb.0: ; %entry 1725; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1726; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1727; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1728; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1729; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1730; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1731; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1732; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1733; GFX940-TGSPLIT-NEXT: s_endpgm 1734; 1735; GFX11-WGP-LABEL: flat_system_release_atomicrmw: 1736; GFX11-WGP: ; %bb.0: ; %entry 1737; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1738; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1739; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1740; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1741; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1742; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1743; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1744; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1745; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 1746; GFX11-WGP-NEXT: s_endpgm 1747; 1748; GFX11-CU-LABEL: flat_system_release_atomicrmw: 1749; GFX11-CU: ; %bb.0: ; %entry 1750; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1751; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1752; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1753; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1754; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1755; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1756; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1757; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1758; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 1759; GFX11-CU-NEXT: s_endpgm 1760; 1761; GFX12-WGP-LABEL: flat_system_release_atomicrmw: 1762; GFX12-WGP: ; %bb.0: ; %entry 1763; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1764; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1765; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1766; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1767; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1768; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1769; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1770; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1771; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1772; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1773; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1774; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1775; GFX12-WGP-NEXT: s_endpgm 1776; 1777; GFX12-CU-LABEL: flat_system_release_atomicrmw: 1778; GFX12-CU: ; %bb.0: ; %entry 1779; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1780; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1781; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1782; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1783; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1784; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1785; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1786; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1787; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1788; GFX12-CU-NEXT: s_wait_storecnt 0x0 1789; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1790; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1791; GFX12-CU-NEXT: s_endpgm 1792 ptr %out, i32 %in) { 1793entry: 1794 %val = atomicrmw volatile xchg ptr %out, i32 %in release 1795 ret void 1796} 1797 1798define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( 1799; GFX7-LABEL: flat_system_acq_rel_atomicrmw: 1800; GFX7: ; %bb.0: ; %entry 1801; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1802; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1803; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1804; GFX7-NEXT: v_mov_b32_e32 v0, s6 1805; GFX7-NEXT: v_mov_b32_e32 v1, s7 1806; GFX7-NEXT: v_mov_b32_e32 v2, s4 1807; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1808; GFX7-NEXT: flat_atomic_swap v[0:1], v2 1809; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1810; GFX7-NEXT: buffer_wbinvl1_vol 1811; GFX7-NEXT: s_endpgm 1812; 1813; GFX10-WGP-LABEL: flat_system_acq_rel_atomicrmw: 1814; GFX10-WGP: ; %bb.0: ; %entry 1815; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1816; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 1817; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1818; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 1819; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 1820; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 1821; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1822; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1823; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 1824; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1825; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1826; GFX10-WGP-NEXT: buffer_gl1_inv 1827; GFX10-WGP-NEXT: buffer_gl0_inv 1828; GFX10-WGP-NEXT: s_endpgm 1829; 1830; GFX10-CU-LABEL: flat_system_acq_rel_atomicrmw: 1831; GFX10-CU: ; %bb.0: ; %entry 1832; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1833; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 1834; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1835; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 1836; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 1837; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 1838; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1839; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1840; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 1841; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1842; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1843; GFX10-CU-NEXT: buffer_gl1_inv 1844; GFX10-CU-NEXT: buffer_gl0_inv 1845; GFX10-CU-NEXT: s_endpgm 1846; 1847; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_atomicrmw: 1848; SKIP-CACHE-INV: ; %bb.0: ; %entry 1849; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1850; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 1851; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1852; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1853; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 1854; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 1855; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1856; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 1857; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1858; SKIP-CACHE-INV-NEXT: s_endpgm 1859; 1860; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: 1861; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 1862; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1863; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1864; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1865; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1866; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1867; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 1868; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1869; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1870; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1871; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 1872; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 1873; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 1874; 1875; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: 1876; GFX90A-TGSPLIT: ; %bb.0: ; %entry 1877; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1878; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 1879; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1880; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 1881; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 1882; GFX90A-TGSPLIT-NEXT: buffer_wbl2 1883; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1884; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 1885; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1886; GFX90A-TGSPLIT-NEXT: buffer_invl2 1887; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 1888; GFX90A-TGSPLIT-NEXT: s_endpgm 1889; 1890; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: 1891; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 1892; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1893; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1894; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1895; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1896; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1897; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1898; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1899; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1900; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1901; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 1902; GFX940-NOTTGSPLIT-NEXT: s_endpgm 1903; 1904; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: 1905; GFX940-TGSPLIT: ; %bb.0: ; %entry 1906; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1907; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 1908; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 1909; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 1910; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 1911; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 1912; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1913; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 1914; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 1915; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 1916; GFX940-TGSPLIT-NEXT: s_endpgm 1917; 1918; GFX11-WGP-LABEL: flat_system_acq_rel_atomicrmw: 1919; GFX11-WGP: ; %bb.0: ; %entry 1920; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1921; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1922; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1923; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 1924; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 1925; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 1926; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1927; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1928; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 1929; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 1930; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1931; GFX11-WGP-NEXT: buffer_gl1_inv 1932; GFX11-WGP-NEXT: buffer_gl0_inv 1933; GFX11-WGP-NEXT: s_endpgm 1934; 1935; GFX11-CU-LABEL: flat_system_acq_rel_atomicrmw: 1936; GFX11-CU: ; %bb.0: ; %entry 1937; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1938; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1939; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1940; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 1941; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 1942; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 1943; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1944; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1945; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 1946; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 1947; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 1948; GFX11-CU-NEXT: buffer_gl1_inv 1949; GFX11-CU-NEXT: buffer_gl0_inv 1950; GFX11-CU-NEXT: s_endpgm 1951; 1952; GFX12-WGP-LABEL: flat_system_acq_rel_atomicrmw: 1953; GFX12-WGP: ; %bb.0: ; %entry 1954; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1955; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 1956; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 1957; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 1958; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 1959; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 1960; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 1961; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 1962; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 1963; GFX12-WGP-NEXT: s_wait_storecnt 0x0 1964; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 1965; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1966; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 1967; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 1968; GFX12-WGP-NEXT: s_endpgm 1969; 1970; GFX12-CU-LABEL: flat_system_acq_rel_atomicrmw: 1971; GFX12-CU: ; %bb.0: ; %entry 1972; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 1973; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 1974; GFX12-CU-NEXT: s_wait_kmcnt 0x0 1975; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 1976; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 1977; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 1978; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 1979; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 1980; GFX12-CU-NEXT: s_wait_samplecnt 0x0 1981; GFX12-CU-NEXT: s_wait_storecnt 0x0 1982; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 1983; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 1984; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 1985; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 1986; GFX12-CU-NEXT: s_endpgm 1987 ptr %out, i32 %in) { 1988entry: 1989 %val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel 1990 ret void 1991} 1992 1993define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( 1994; GFX7-LABEL: flat_system_seq_cst_atomicrmw: 1995; GFX7: ; %bb.0: ; %entry 1996; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 1997; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 1998; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1999; GFX7-NEXT: v_mov_b32_e32 v0, s6 2000; GFX7-NEXT: v_mov_b32_e32 v1, s7 2001; GFX7-NEXT: v_mov_b32_e32 v2, s4 2002; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2003; GFX7-NEXT: flat_atomic_swap v[0:1], v2 2004; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2005; GFX7-NEXT: buffer_wbinvl1_vol 2006; GFX7-NEXT: s_endpgm 2007; 2008; GFX10-WGP-LABEL: flat_system_seq_cst_atomicrmw: 2009; GFX10-WGP: ; %bb.0: ; %entry 2010; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2011; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 2012; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2013; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 2014; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 2015; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 2016; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2017; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2018; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 2019; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2020; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2021; GFX10-WGP-NEXT: buffer_gl1_inv 2022; GFX10-WGP-NEXT: buffer_gl0_inv 2023; GFX10-WGP-NEXT: s_endpgm 2024; 2025; GFX10-CU-LABEL: flat_system_seq_cst_atomicrmw: 2026; GFX10-CU: ; %bb.0: ; %entry 2027; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2028; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 2029; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2030; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 2031; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 2032; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 2033; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2034; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2035; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 2036; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2037; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2038; GFX10-CU-NEXT: buffer_gl1_inv 2039; GFX10-CU-NEXT: buffer_gl0_inv 2040; GFX10-CU-NEXT: s_endpgm 2041; 2042; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_atomicrmw: 2043; SKIP-CACHE-INV: ; %bb.0: ; %entry 2044; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2045; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 2046; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2047; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2048; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 2049; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 2050; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2051; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 2052; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2053; SKIP-CACHE-INV-NEXT: s_endpgm 2054; 2055; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: 2056; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2057; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2058; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 2059; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2060; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 2061; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 2062; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2063; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2064; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 2065; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2066; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2067; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2068; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2069; 2070; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: 2071; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2072; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 2073; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 2074; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2075; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 2076; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 2077; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2078; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2079; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 2080; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2081; GFX90A-TGSPLIT-NEXT: buffer_invl2 2082; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2083; GFX90A-TGSPLIT-NEXT: s_endpgm 2084; 2085; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: 2086; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2087; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2088; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 2089; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2090; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 2091; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 2092; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2093; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2094; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 2095; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2096; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2097; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2098; 2099; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: 2100; GFX940-TGSPLIT: ; %bb.0: ; %entry 2101; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2102; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 2103; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2104; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 2105; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 2106; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2107; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2108; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 2109; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2110; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2111; GFX940-TGSPLIT-NEXT: s_endpgm 2112; 2113; GFX11-WGP-LABEL: flat_system_seq_cst_atomicrmw: 2114; GFX11-WGP: ; %bb.0: ; %entry 2115; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 2116; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 2117; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2118; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 2119; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 2120; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 2121; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2122; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2123; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 2124; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2125; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2126; GFX11-WGP-NEXT: buffer_gl1_inv 2127; GFX11-WGP-NEXT: buffer_gl0_inv 2128; GFX11-WGP-NEXT: s_endpgm 2129; 2130; GFX11-CU-LABEL: flat_system_seq_cst_atomicrmw: 2131; GFX11-CU: ; %bb.0: ; %entry 2132; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 2133; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 2134; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2135; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 2136; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 2137; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 2138; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2139; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2140; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 2141; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2142; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2143; GFX11-CU-NEXT: buffer_gl1_inv 2144; GFX11-CU-NEXT: buffer_gl0_inv 2145; GFX11-CU-NEXT: s_endpgm 2146; 2147; GFX12-WGP-LABEL: flat_system_seq_cst_atomicrmw: 2148; GFX12-WGP: ; %bb.0: ; %entry 2149; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 2150; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 2151; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2152; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 2153; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 2154; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 2155; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2156; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2157; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2158; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2159; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2160; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 2161; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 2162; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2163; GFX12-WGP-NEXT: s_endpgm 2164; 2165; GFX12-CU-LABEL: flat_system_seq_cst_atomicrmw: 2166; GFX12-CU: ; %bb.0: ; %entry 2167; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 2168; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 2169; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2170; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 2171; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 2172; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 2173; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2174; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2175; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2176; GFX12-CU-NEXT: s_wait_storecnt 0x0 2177; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2178; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 2179; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 2180; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2181; GFX12-CU-NEXT: s_endpgm 2182 ptr %out, i32 %in) { 2183entry: 2184 %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst 2185 ret void 2186} 2187 2188define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( 2189; GFX7-LABEL: flat_system_acquire_ret_atomicrmw: 2190; GFX7: ; %bb.0: ; %entry 2191; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2192; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2193; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2194; GFX7-NEXT: v_mov_b32_e32 v0, s4 2195; GFX7-NEXT: v_mov_b32_e32 v1, s5 2196; GFX7-NEXT: v_mov_b32_e32 v2, s6 2197; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2198; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2199; GFX7-NEXT: buffer_wbinvl1_vol 2200; GFX7-NEXT: v_mov_b32_e32 v0, s4 2201; GFX7-NEXT: v_mov_b32_e32 v1, s5 2202; GFX7-NEXT: flat_store_dword v[0:1], v2 2203; GFX7-NEXT: s_endpgm 2204; 2205; GFX10-WGP-LABEL: flat_system_acquire_ret_atomicrmw: 2206; GFX10-WGP: ; %bb.0: ; %entry 2207; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2208; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2209; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2210; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2211; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2212; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 2213; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2214; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2215; GFX10-WGP-NEXT: buffer_gl1_inv 2216; GFX10-WGP-NEXT: buffer_gl0_inv 2217; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2218; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2219; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 2220; GFX10-WGP-NEXT: s_endpgm 2221; 2222; GFX10-CU-LABEL: flat_system_acquire_ret_atomicrmw: 2223; GFX10-CU: ; %bb.0: ; %entry 2224; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2225; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2226; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2227; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2228; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2229; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 2230; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2231; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2232; GFX10-CU-NEXT: buffer_gl1_inv 2233; GFX10-CU-NEXT: buffer_gl0_inv 2234; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2235; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2236; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 2237; GFX10-CU-NEXT: s_endpgm 2238; 2239; SKIP-CACHE-INV-LABEL: flat_system_acquire_ret_atomicrmw: 2240; SKIP-CACHE-INV: ; %bb.0: ; %entry 2241; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2242; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 2243; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2244; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2246; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 2247; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2248; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2249; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2250; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2251; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 2252; SKIP-CACHE-INV-NEXT: s_endpgm 2253; 2254; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: 2255; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2256; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2257; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2258; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2259; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2260; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2261; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2262; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2263; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2264; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2265; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2266; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 2267; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2268; 2269; GFX90A-TGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: 2270; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2271; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2272; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2273; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2274; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2275; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2276; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2277; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2278; GFX90A-TGSPLIT-NEXT: buffer_invl2 2279; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2280; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2281; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 2282; GFX90A-TGSPLIT-NEXT: s_endpgm 2283; 2284; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: 2285; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2286; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2287; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2288; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2289; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2290; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2291; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2292; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2293; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2294; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2295; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2296; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2297; 2298; GFX940-TGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: 2299; GFX940-TGSPLIT: ; %bb.0: ; %entry 2300; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2301; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2302; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2303; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2304; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2305; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2306; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2307; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2308; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2309; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2310; GFX940-TGSPLIT-NEXT: s_endpgm 2311; 2312; GFX11-WGP-LABEL: flat_system_acquire_ret_atomicrmw: 2313; GFX11-WGP: ; %bb.0: ; %entry 2314; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2315; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2316; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2317; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2318; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2319; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 2320; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2321; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2322; GFX11-WGP-NEXT: buffer_gl1_inv 2323; GFX11-WGP-NEXT: buffer_gl0_inv 2324; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2325; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2326; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 2327; GFX11-WGP-NEXT: s_endpgm 2328; 2329; GFX11-CU-LABEL: flat_system_acquire_ret_atomicrmw: 2330; GFX11-CU: ; %bb.0: ; %entry 2331; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2332; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2333; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2334; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2335; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2336; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 2337; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2338; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2339; GFX11-CU-NEXT: buffer_gl1_inv 2340; GFX11-CU-NEXT: buffer_gl0_inv 2341; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2342; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2343; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 2344; GFX11-CU-NEXT: s_endpgm 2345; 2346; GFX12-WGP-LABEL: flat_system_acquire_ret_atomicrmw: 2347; GFX12-WGP: ; %bb.0: ; %entry 2348; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2349; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2350; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2351; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2352; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2353; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 2354; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2355; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2356; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2357; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2358; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2359; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 2360; GFX12-WGP-NEXT: s_endpgm 2361; 2362; GFX12-CU-LABEL: flat_system_acquire_ret_atomicrmw: 2363; GFX12-CU: ; %bb.0: ; %entry 2364; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2365; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2366; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2367; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2368; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2369; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 2370; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2371; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2372; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2373; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2374; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2375; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 2376; GFX12-CU-NEXT: s_endpgm 2377 ptr %out, i32 %in) { 2378entry: 2379 %val = atomicrmw volatile xchg ptr %out, i32 %in acquire 2380 store i32 %val, ptr %out, align 4 2381 ret void 2382} 2383 2384define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( 2385; GFX7-LABEL: flat_system_acq_rel_ret_atomicrmw: 2386; GFX7: ; %bb.0: ; %entry 2387; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2388; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2389; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2390; GFX7-NEXT: v_mov_b32_e32 v0, s4 2391; GFX7-NEXT: v_mov_b32_e32 v1, s5 2392; GFX7-NEXT: v_mov_b32_e32 v2, s6 2393; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2394; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2395; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2396; GFX7-NEXT: buffer_wbinvl1_vol 2397; GFX7-NEXT: v_mov_b32_e32 v0, s4 2398; GFX7-NEXT: v_mov_b32_e32 v1, s5 2399; GFX7-NEXT: flat_store_dword v[0:1], v2 2400; GFX7-NEXT: s_endpgm 2401; 2402; GFX10-WGP-LABEL: flat_system_acq_rel_ret_atomicrmw: 2403; GFX10-WGP: ; %bb.0: ; %entry 2404; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2405; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2406; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2407; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2408; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2409; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 2410; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2411; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2412; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2413; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2414; GFX10-WGP-NEXT: buffer_gl1_inv 2415; GFX10-WGP-NEXT: buffer_gl0_inv 2416; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2417; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2418; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 2419; GFX10-WGP-NEXT: s_endpgm 2420; 2421; GFX10-CU-LABEL: flat_system_acq_rel_ret_atomicrmw: 2422; GFX10-CU: ; %bb.0: ; %entry 2423; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2424; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2425; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2426; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2427; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2428; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 2429; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2430; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2431; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2432; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2433; GFX10-CU-NEXT: buffer_gl1_inv 2434; GFX10-CU-NEXT: buffer_gl0_inv 2435; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2436; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2437; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 2438; GFX10-CU-NEXT: s_endpgm 2439; 2440; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_ret_atomicrmw: 2441; SKIP-CACHE-INV: ; %bb.0: ; %entry 2442; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2443; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 2444; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2445; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2446; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2447; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 2448; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2449; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2450; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2451; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2452; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2453; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 2454; SKIP-CACHE-INV-NEXT: s_endpgm 2455; 2456; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: 2457; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2458; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2459; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2460; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2461; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2462; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2463; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2464; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2465; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2466; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2467; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2468; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2469; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2470; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 2471; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2472; 2473; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: 2474; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2475; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2476; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2477; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2478; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2479; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2480; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2481; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2482; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2483; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2484; GFX90A-TGSPLIT-NEXT: buffer_invl2 2485; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2486; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2487; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 2488; GFX90A-TGSPLIT-NEXT: s_endpgm 2489; 2490; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: 2491; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2492; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2493; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2494; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2495; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2496; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2497; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2498; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2499; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2500; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2501; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2502; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2503; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2504; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2505; 2506; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: 2507; GFX940-TGSPLIT: ; %bb.0: ; %entry 2508; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2509; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2510; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2511; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2512; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2513; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2514; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2515; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2516; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2517; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2518; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2519; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2520; GFX940-TGSPLIT-NEXT: s_endpgm 2521; 2522; GFX11-WGP-LABEL: flat_system_acq_rel_ret_atomicrmw: 2523; GFX11-WGP: ; %bb.0: ; %entry 2524; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2525; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2526; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2527; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2528; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2529; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 2530; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2531; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2532; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2533; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2534; GFX11-WGP-NEXT: buffer_gl1_inv 2535; GFX11-WGP-NEXT: buffer_gl0_inv 2536; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2537; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2538; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 2539; GFX11-WGP-NEXT: s_endpgm 2540; 2541; GFX11-CU-LABEL: flat_system_acq_rel_ret_atomicrmw: 2542; GFX11-CU: ; %bb.0: ; %entry 2543; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2544; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2545; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2546; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2547; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2548; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 2549; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2550; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2551; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2552; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2553; GFX11-CU-NEXT: buffer_gl1_inv 2554; GFX11-CU-NEXT: buffer_gl0_inv 2555; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2556; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2557; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 2558; GFX11-CU-NEXT: s_endpgm 2559; 2560; GFX12-WGP-LABEL: flat_system_acq_rel_ret_atomicrmw: 2561; GFX12-WGP: ; %bb.0: ; %entry 2562; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2563; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2564; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2565; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2566; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2567; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 2568; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2569; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2570; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2571; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2572; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2573; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2574; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2575; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2576; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2577; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2578; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2579; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2580; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 2581; GFX12-WGP-NEXT: s_endpgm 2582; 2583; GFX12-CU-LABEL: flat_system_acq_rel_ret_atomicrmw: 2584; GFX12-CU: ; %bb.0: ; %entry 2585; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2586; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2587; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2588; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2589; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2590; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 2591; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2592; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2593; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2594; GFX12-CU-NEXT: s_wait_storecnt 0x0 2595; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2596; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2597; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2598; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2599; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2600; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2601; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2602; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2603; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 2604; GFX12-CU-NEXT: s_endpgm 2605 ptr %out, i32 %in) { 2606entry: 2607 %val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel 2608 store i32 %val, ptr %out, align 4 2609 ret void 2610} 2611 2612define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( 2613; GFX7-LABEL: flat_system_seq_cst_ret_atomicrmw: 2614; GFX7: ; %bb.0: ; %entry 2615; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2616; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 2617; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2618; GFX7-NEXT: v_mov_b32_e32 v0, s4 2619; GFX7-NEXT: v_mov_b32_e32 v1, s5 2620; GFX7-NEXT: v_mov_b32_e32 v2, s6 2621; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2622; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2623; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2624; GFX7-NEXT: buffer_wbinvl1_vol 2625; GFX7-NEXT: v_mov_b32_e32 v0, s4 2626; GFX7-NEXT: v_mov_b32_e32 v1, s5 2627; GFX7-NEXT: flat_store_dword v[0:1], v2 2628; GFX7-NEXT: s_endpgm 2629; 2630; GFX10-WGP-LABEL: flat_system_seq_cst_ret_atomicrmw: 2631; GFX10-WGP: ; %bb.0: ; %entry 2632; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2633; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 2634; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2635; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2636; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2637; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 2638; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2639; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2640; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2641; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2642; GFX10-WGP-NEXT: buffer_gl1_inv 2643; GFX10-WGP-NEXT: buffer_gl0_inv 2644; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2645; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2646; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 2647; GFX10-WGP-NEXT: s_endpgm 2648; 2649; GFX10-CU-LABEL: flat_system_seq_cst_ret_atomicrmw: 2650; GFX10-CU: ; %bb.0: ; %entry 2651; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2652; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 2653; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2654; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2655; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2656; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 2657; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2658; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2659; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2660; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2661; GFX10-CU-NEXT: buffer_gl1_inv 2662; GFX10-CU-NEXT: buffer_gl0_inv 2663; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2664; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2665; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 2666; GFX10-CU-NEXT: s_endpgm 2667; 2668; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_ret_atomicrmw: 2669; SKIP-CACHE-INV: ; %bb.0: ; %entry 2670; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2671; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 2672; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2673; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2674; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2675; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 2676; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2677; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2678; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2679; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2680; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2681; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 2682; SKIP-CACHE-INV-NEXT: s_endpgm 2683; 2684; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: 2685; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2686; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2687; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2688; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2689; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2690; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2691; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 2692; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2693; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2694; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2695; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 2696; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 2697; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2698; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 2699; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2700; 2701; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: 2702; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2703; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2704; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 2705; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2706; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2707; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 2708; GFX90A-TGSPLIT-NEXT: buffer_wbl2 2709; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2710; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 2711; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2712; GFX90A-TGSPLIT-NEXT: buffer_invl2 2713; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 2714; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2715; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 2716; GFX90A-TGSPLIT-NEXT: s_endpgm 2717; 2718; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: 2719; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2720; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2721; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2722; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2723; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2724; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2725; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2726; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2727; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2728; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2729; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 2730; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2731; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2732; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2733; 2734; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: 2735; GFX940-TGSPLIT: ; %bb.0: ; %entry 2736; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2737; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 2738; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2739; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2740; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 2741; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 2742; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2743; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 2744; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 2745; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 2746; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2747; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 2748; GFX940-TGSPLIT-NEXT: s_endpgm 2749; 2750; GFX11-WGP-LABEL: flat_system_seq_cst_ret_atomicrmw: 2751; GFX11-WGP: ; %bb.0: ; %entry 2752; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2753; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2754; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 2755; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2756; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2757; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 2758; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2759; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2760; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2761; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2762; GFX11-WGP-NEXT: buffer_gl1_inv 2763; GFX11-WGP-NEXT: buffer_gl0_inv 2764; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 2765; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 2766; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 2767; GFX11-WGP-NEXT: s_endpgm 2768; 2769; GFX11-CU-LABEL: flat_system_seq_cst_ret_atomicrmw: 2770; GFX11-CU: ; %bb.0: ; %entry 2771; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2772; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2773; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 2774; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2775; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2776; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 2777; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2778; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 2779; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 2780; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2781; GFX11-CU-NEXT: buffer_gl1_inv 2782; GFX11-CU-NEXT: buffer_gl0_inv 2783; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 2784; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 2785; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 2786; GFX11-CU-NEXT: s_endpgm 2787; 2788; GFX12-WGP-LABEL: flat_system_seq_cst_ret_atomicrmw: 2789; GFX12-WGP: ; %bb.0: ; %entry 2790; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2791; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 2792; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 2793; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2794; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2795; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 2796; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 2797; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2798; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2799; GFX12-WGP-NEXT: s_wait_storecnt 0x0 2800; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2801; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2802; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 2803; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 2804; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 2805; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 2806; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 2807; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 2808; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 2809; GFX12-WGP-NEXT: s_endpgm 2810; 2811; GFX12-CU-LABEL: flat_system_seq_cst_ret_atomicrmw: 2812; GFX12-CU: ; %bb.0: ; %entry 2813; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 2814; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 2815; GFX12-CU-NEXT: s_wait_kmcnt 0x0 2816; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2817; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2818; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 2819; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 2820; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2821; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2822; GFX12-CU-NEXT: s_wait_storecnt 0x0 2823; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2824; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 2825; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 2826; GFX12-CU-NEXT: s_wait_samplecnt 0x0 2827; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 2828; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 2829; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 2830; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 2831; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 2832; GFX12-CU-NEXT: s_endpgm 2833 ptr %out, i32 %in) { 2834entry: 2835 %val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst 2836 store i32 %val, ptr %out, align 4 2837 ret void 2838} 2839 2840define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( 2841; GFX7-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2842; GFX7: ; %bb.0: ; %entry 2843; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 2844; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 2845; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 2846; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 2847; GFX7-NEXT: s_mov_b64 s[10:11], 16 2848; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2849; GFX7-NEXT: s_mov_b32 s4, s8 2850; GFX7-NEXT: s_mov_b32 s5, s9 2851; GFX7-NEXT: s_mov_b32 s9, s10 2852; GFX7-NEXT: s_mov_b32 s8, s11 2853; GFX7-NEXT: s_add_u32 s4, s4, s9 2854; GFX7-NEXT: s_addc_u32 s8, s5, s8 2855; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2856; GFX7-NEXT: s_mov_b32 s5, s8 2857; GFX7-NEXT: v_mov_b32_e32 v2, s7 2858; GFX7-NEXT: v_mov_b32_e32 v0, s6 2859; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2860; GFX7-NEXT: v_mov_b32_e32 v3, v0 2861; GFX7-NEXT: v_mov_b32_e32 v0, s4 2862; GFX7-NEXT: v_mov_b32_e32 v1, s5 2863; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2864; GFX7-NEXT: s_endpgm 2865; 2866; GFX10-WGP-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2867; GFX10-WGP: ; %bb.0: ; %entry 2868; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 2869; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 2870; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 2871; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 2872; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 2873; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2874; GFX10-WGP-NEXT: s_mov_b32 s4, s8 2875; GFX10-WGP-NEXT: s_mov_b32 s5, s9 2876; GFX10-WGP-NEXT: s_mov_b32 s9, s10 2877; GFX10-WGP-NEXT: s_mov_b32 s8, s11 2878; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 2879; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 2880; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2881; GFX10-WGP-NEXT: s_mov_b32 s5, s8 2882; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 2883; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 2884; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2885; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 2886; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 2887; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 2888; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2889; GFX10-WGP-NEXT: s_endpgm 2890; 2891; GFX10-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2892; GFX10-CU: ; %bb.0: ; %entry 2893; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 2894; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 2895; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 2896; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 2897; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 2898; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2899; GFX10-CU-NEXT: s_mov_b32 s4, s8 2900; GFX10-CU-NEXT: s_mov_b32 s5, s9 2901; GFX10-CU-NEXT: s_mov_b32 s9, s10 2902; GFX10-CU-NEXT: s_mov_b32 s8, s11 2903; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 2904; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 2905; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2906; GFX10-CU-NEXT: s_mov_b32 s5, s8 2907; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 2908; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 2909; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2910; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 2911; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 2912; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 2913; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2914; GFX10-CU-NEXT: s_endpgm 2915; 2916; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2917; SKIP-CACHE-INV: ; %bb.0: ; %entry 2918; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 2919; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 2920; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 2921; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 2922; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 2923; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2924; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 2925; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 2926; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 2927; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 2928; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 2929; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 2930; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 2931; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 2932; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2933; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2934; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2935; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 2936; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2937; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2938; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 2939; SKIP-CACHE-INV-NEXT: s_endpgm 2940; 2941; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2942; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 2943; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2944; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2945; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2946; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2947; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2948; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 2949; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2950; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 2951; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2952; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 2953; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 2954; 2955; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2956; GFX90A-TGSPLIT: ; %bb.0: ; %entry 2957; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 2958; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 2959; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 2960; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2961; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 2962; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 2963; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2964; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 2965; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 2966; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 2967; GFX90A-TGSPLIT-NEXT: s_endpgm 2968; 2969; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2970; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 2971; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2972; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 2973; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 2974; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2975; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 2976; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 2977; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2978; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 2979; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2980; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 2981; GFX940-NOTTGSPLIT-NEXT: s_endpgm 2982; 2983; GFX940-TGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2984; GFX940-TGSPLIT: ; %bb.0: ; %entry 2985; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2986; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 2987; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 2988; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 2989; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 2990; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 2991; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2992; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 2993; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 2994; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 2995; GFX940-TGSPLIT-NEXT: s_endpgm 2996; 2997; GFX11-WGP-LABEL: flat_system_monotonic_monotonic_cmpxchg: 2998; GFX11-WGP: ; %bb.0: ; %entry 2999; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3000; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3001; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3002; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3003; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 3004; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 3005; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3006; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 3007; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 3008; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 3009; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3010; GFX11-WGP-NEXT: s_endpgm 3011; 3012; GFX11-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg: 3013; GFX11-CU: ; %bb.0: ; %entry 3014; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3015; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3016; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3017; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3018; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 3019; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 3020; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3021; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 3022; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 3023; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 3024; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3025; GFX11-CU-NEXT: s_endpgm 3026; 3027; GFX12-WGP-LABEL: flat_system_monotonic_monotonic_cmpxchg: 3028; GFX12-WGP: ; %bb.0: ; %entry 3029; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3030; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3031; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3032; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3033; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 3034; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 3035; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3036; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 3037; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 3038; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 3039; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3040; GFX12-WGP-NEXT: s_endpgm 3041; 3042; GFX12-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg: 3043; GFX12-CU: ; %bb.0: ; %entry 3044; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3045; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3046; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3047; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3048; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 3049; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 3050; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3051; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 3052; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 3053; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 3054; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3055; GFX12-CU-NEXT: s_endpgm 3056 ptr %out, i32 %in, i32 %old) { 3057entry: 3058 %gep = getelementptr i32, ptr %out, i32 4 3059 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic 3060 ret void 3061} 3062 3063define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( 3064; GFX7-LABEL: flat_system_acquire_monotonic_cmpxchg: 3065; GFX7: ; %bb.0: ; %entry 3066; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3067; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3068; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3069; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3070; GFX7-NEXT: s_mov_b64 s[10:11], 16 3071; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3072; GFX7-NEXT: s_mov_b32 s4, s8 3073; GFX7-NEXT: s_mov_b32 s5, s9 3074; GFX7-NEXT: s_mov_b32 s9, s10 3075; GFX7-NEXT: s_mov_b32 s8, s11 3076; GFX7-NEXT: s_add_u32 s4, s4, s9 3077; GFX7-NEXT: s_addc_u32 s8, s5, s8 3078; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3079; GFX7-NEXT: s_mov_b32 s5, s8 3080; GFX7-NEXT: v_mov_b32_e32 v2, s7 3081; GFX7-NEXT: v_mov_b32_e32 v0, s6 3082; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3083; GFX7-NEXT: v_mov_b32_e32 v3, v0 3084; GFX7-NEXT: v_mov_b32_e32 v0, s4 3085; GFX7-NEXT: v_mov_b32_e32 v1, s5 3086; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3087; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3088; GFX7-NEXT: buffer_wbinvl1_vol 3089; GFX7-NEXT: s_endpgm 3090; 3091; GFX10-WGP-LABEL: flat_system_acquire_monotonic_cmpxchg: 3092; GFX10-WGP: ; %bb.0: ; %entry 3093; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 3094; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3095; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 3096; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 3097; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 3098; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3099; GFX10-WGP-NEXT: s_mov_b32 s4, s8 3100; GFX10-WGP-NEXT: s_mov_b32 s5, s9 3101; GFX10-WGP-NEXT: s_mov_b32 s9, s10 3102; GFX10-WGP-NEXT: s_mov_b32 s8, s11 3103; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 3104; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 3105; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3106; GFX10-WGP-NEXT: s_mov_b32 s5, s8 3107; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 3108; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 3109; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3110; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 3111; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 3112; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 3113; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3114; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3115; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3116; GFX10-WGP-NEXT: buffer_gl1_inv 3117; GFX10-WGP-NEXT: buffer_gl0_inv 3118; GFX10-WGP-NEXT: s_endpgm 3119; 3120; GFX10-CU-LABEL: flat_system_acquire_monotonic_cmpxchg: 3121; GFX10-CU: ; %bb.0: ; %entry 3122; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 3123; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3124; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 3125; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 3126; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 3127; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3128; GFX10-CU-NEXT: s_mov_b32 s4, s8 3129; GFX10-CU-NEXT: s_mov_b32 s5, s9 3130; GFX10-CU-NEXT: s_mov_b32 s9, s10 3131; GFX10-CU-NEXT: s_mov_b32 s8, s11 3132; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 3133; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 3134; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3135; GFX10-CU-NEXT: s_mov_b32 s5, s8 3136; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 3137; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 3138; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3139; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 3140; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 3141; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 3142; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3143; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3144; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3145; GFX10-CU-NEXT: buffer_gl1_inv 3146; GFX10-CU-NEXT: buffer_gl0_inv 3147; GFX10-CU-NEXT: s_endpgm 3148; 3149; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_cmpxchg: 3150; SKIP-CACHE-INV: ; %bb.0: ; %entry 3151; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 3152; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 3153; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 3154; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 3155; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 3156; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3157; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3158; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3159; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 3160; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 3161; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 3162; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 3163; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 3164; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 3165; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3166; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3167; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3168; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 3169; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3170; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3171; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3172; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3173; SKIP-CACHE-INV-NEXT: s_endpgm 3174; 3175; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: 3176; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3177; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3178; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3179; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3180; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3181; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3182; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3183; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3184; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3185; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3186; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3187; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3188; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3189; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3190; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3191; 3192; GFX90A-TGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: 3193; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3194; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3195; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3196; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3197; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3198; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3199; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3200; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3201; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3202; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3203; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3204; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3205; GFX90A-TGSPLIT-NEXT: buffer_invl2 3206; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3207; GFX90A-TGSPLIT-NEXT: s_endpgm 3208; 3209; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: 3210; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3211; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3212; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3213; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3214; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3215; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3216; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3217; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3218; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3219; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3220; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3221; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3222; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 3223; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3224; 3225; GFX940-TGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: 3226; GFX940-TGSPLIT: ; %bb.0: ; %entry 3227; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3228; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3229; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3230; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3231; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3232; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3233; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3234; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3235; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3236; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3237; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3238; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 3239; GFX940-TGSPLIT-NEXT: s_endpgm 3240; 3241; GFX11-WGP-LABEL: flat_system_acquire_monotonic_cmpxchg: 3242; GFX11-WGP: ; %bb.0: ; %entry 3243; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3244; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3245; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3246; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3247; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 3248; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 3249; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3250; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 3251; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 3252; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 3253; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3254; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3255; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3256; GFX11-WGP-NEXT: buffer_gl1_inv 3257; GFX11-WGP-NEXT: buffer_gl0_inv 3258; GFX11-WGP-NEXT: s_endpgm 3259; 3260; GFX11-CU-LABEL: flat_system_acquire_monotonic_cmpxchg: 3261; GFX11-CU: ; %bb.0: ; %entry 3262; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3263; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3264; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3265; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3266; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 3267; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 3268; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3269; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 3270; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 3271; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 3272; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3273; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3274; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3275; GFX11-CU-NEXT: buffer_gl1_inv 3276; GFX11-CU-NEXT: buffer_gl0_inv 3277; GFX11-CU-NEXT: s_endpgm 3278; 3279; GFX12-WGP-LABEL: flat_system_acquire_monotonic_cmpxchg: 3280; GFX12-WGP: ; %bb.0: ; %entry 3281; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3282; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3283; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3284; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3285; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 3286; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 3287; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3288; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 3289; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 3290; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 3291; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3292; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 3293; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 3294; GFX12-WGP-NEXT: s_endpgm 3295; 3296; GFX12-CU-LABEL: flat_system_acquire_monotonic_cmpxchg: 3297; GFX12-CU: ; %bb.0: ; %entry 3298; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3299; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3300; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3301; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3302; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 3303; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 3304; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3305; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 3306; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 3307; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 3308; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3309; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 3310; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 3311; GFX12-CU-NEXT: s_endpgm 3312 ptr %out, i32 %in, i32 %old) { 3313entry: 3314 %gep = getelementptr i32, ptr %out, i32 4 3315 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic 3316 ret void 3317} 3318 3319define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( 3320; GFX7-LABEL: flat_system_release_monotonic_cmpxchg: 3321; GFX7: ; %bb.0: ; %entry 3322; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3323; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3324; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3325; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3326; GFX7-NEXT: s_mov_b64 s[10:11], 16 3327; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3328; GFX7-NEXT: s_mov_b32 s4, s8 3329; GFX7-NEXT: s_mov_b32 s5, s9 3330; GFX7-NEXT: s_mov_b32 s9, s10 3331; GFX7-NEXT: s_mov_b32 s8, s11 3332; GFX7-NEXT: s_add_u32 s4, s4, s9 3333; GFX7-NEXT: s_addc_u32 s8, s5, s8 3334; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3335; GFX7-NEXT: s_mov_b32 s5, s8 3336; GFX7-NEXT: v_mov_b32_e32 v2, s7 3337; GFX7-NEXT: v_mov_b32_e32 v0, s6 3338; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3339; GFX7-NEXT: v_mov_b32_e32 v3, v0 3340; GFX7-NEXT: v_mov_b32_e32 v0, s4 3341; GFX7-NEXT: v_mov_b32_e32 v1, s5 3342; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3343; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3344; GFX7-NEXT: s_endpgm 3345; 3346; GFX10-WGP-LABEL: flat_system_release_monotonic_cmpxchg: 3347; GFX10-WGP: ; %bb.0: ; %entry 3348; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 3349; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3350; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 3351; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 3352; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 3353; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3354; GFX10-WGP-NEXT: s_mov_b32 s4, s8 3355; GFX10-WGP-NEXT: s_mov_b32 s5, s9 3356; GFX10-WGP-NEXT: s_mov_b32 s9, s10 3357; GFX10-WGP-NEXT: s_mov_b32 s8, s11 3358; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 3359; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 3360; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3361; GFX10-WGP-NEXT: s_mov_b32 s5, s8 3362; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 3363; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 3364; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3365; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 3366; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 3367; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 3368; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3369; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3370; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3371; GFX10-WGP-NEXT: s_endpgm 3372; 3373; GFX10-CU-LABEL: flat_system_release_monotonic_cmpxchg: 3374; GFX10-CU: ; %bb.0: ; %entry 3375; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 3376; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3377; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 3378; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 3379; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 3380; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3381; GFX10-CU-NEXT: s_mov_b32 s4, s8 3382; GFX10-CU-NEXT: s_mov_b32 s5, s9 3383; GFX10-CU-NEXT: s_mov_b32 s9, s10 3384; GFX10-CU-NEXT: s_mov_b32 s8, s11 3385; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 3386; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 3387; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3388; GFX10-CU-NEXT: s_mov_b32 s5, s8 3389; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 3390; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 3391; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3392; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 3393; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 3394; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 3395; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3396; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3397; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3398; GFX10-CU-NEXT: s_endpgm 3399; 3400; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_cmpxchg: 3401; SKIP-CACHE-INV: ; %bb.0: ; %entry 3402; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 3403; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 3404; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 3405; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 3406; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 3407; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3408; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3409; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3410; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 3411; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 3412; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 3413; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 3414; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 3415; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 3416; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3417; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3418; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3419; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 3420; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3421; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3422; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3423; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3424; SKIP-CACHE-INV-NEXT: s_endpgm 3425; 3426; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: 3427; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3428; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3429; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3430; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3431; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3432; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3433; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3434; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3435; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3436; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3437; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3438; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3439; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3440; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3441; 3442; GFX90A-TGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: 3443; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3444; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3445; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3446; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3447; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3448; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3449; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3450; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3451; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3452; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3453; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3454; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3455; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3456; GFX90A-TGSPLIT-NEXT: s_endpgm 3457; 3458; GFX940-NOTTGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: 3459; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3460; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3461; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3462; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3463; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3464; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3465; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3466; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3467; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3468; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3469; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3470; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3471; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3472; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3473; 3474; GFX940-TGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: 3475; GFX940-TGSPLIT: ; %bb.0: ; %entry 3476; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3477; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3478; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3479; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3480; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3481; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3482; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3483; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3484; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3485; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3486; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3487; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3488; GFX940-TGSPLIT-NEXT: s_endpgm 3489; 3490; GFX11-WGP-LABEL: flat_system_release_monotonic_cmpxchg: 3491; GFX11-WGP: ; %bb.0: ; %entry 3492; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3493; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3494; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3495; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3496; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 3497; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 3498; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3499; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 3500; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 3501; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 3502; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3503; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3504; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3505; GFX11-WGP-NEXT: s_endpgm 3506; 3507; GFX11-CU-LABEL: flat_system_release_monotonic_cmpxchg: 3508; GFX11-CU: ; %bb.0: ; %entry 3509; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3510; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3511; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3512; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3513; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 3514; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 3515; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3516; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 3517; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 3518; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 3519; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3520; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3521; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3522; GFX11-CU-NEXT: s_endpgm 3523; 3524; GFX12-WGP-LABEL: flat_system_release_monotonic_cmpxchg: 3525; GFX12-WGP: ; %bb.0: ; %entry 3526; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3527; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3528; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3529; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3530; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 3531; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 3532; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3533; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 3534; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 3535; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 3536; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 3537; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3538; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3539; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3540; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3541; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3542; GFX12-WGP-NEXT: s_endpgm 3543; 3544; GFX12-CU-LABEL: flat_system_release_monotonic_cmpxchg: 3545; GFX12-CU: ; %bb.0: ; %entry 3546; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3547; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3548; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3549; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3550; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 3551; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 3552; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3553; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 3554; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 3555; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 3556; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 3557; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3558; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3559; GFX12-CU-NEXT: s_wait_storecnt 0x0 3560; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3561; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3562; GFX12-CU-NEXT: s_endpgm 3563 ptr %out, i32 %in, i32 %old) { 3564entry: 3565 %gep = getelementptr i32, ptr %out, i32 4 3566 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic 3567 ret void 3568} 3569 3570define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( 3571; GFX7-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3572; GFX7: ; %bb.0: ; %entry 3573; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3574; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3575; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3576; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3577; GFX7-NEXT: s_mov_b64 s[10:11], 16 3578; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3579; GFX7-NEXT: s_mov_b32 s4, s8 3580; GFX7-NEXT: s_mov_b32 s5, s9 3581; GFX7-NEXT: s_mov_b32 s9, s10 3582; GFX7-NEXT: s_mov_b32 s8, s11 3583; GFX7-NEXT: s_add_u32 s4, s4, s9 3584; GFX7-NEXT: s_addc_u32 s8, s5, s8 3585; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3586; GFX7-NEXT: s_mov_b32 s5, s8 3587; GFX7-NEXT: v_mov_b32_e32 v2, s7 3588; GFX7-NEXT: v_mov_b32_e32 v0, s6 3589; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3590; GFX7-NEXT: v_mov_b32_e32 v3, v0 3591; GFX7-NEXT: v_mov_b32_e32 v0, s4 3592; GFX7-NEXT: v_mov_b32_e32 v1, s5 3593; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3594; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3595; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3596; GFX7-NEXT: buffer_wbinvl1_vol 3597; GFX7-NEXT: s_endpgm 3598; 3599; GFX10-WGP-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3600; GFX10-WGP: ; %bb.0: ; %entry 3601; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 3602; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3603; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 3604; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 3605; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 3606; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3607; GFX10-WGP-NEXT: s_mov_b32 s4, s8 3608; GFX10-WGP-NEXT: s_mov_b32 s5, s9 3609; GFX10-WGP-NEXT: s_mov_b32 s9, s10 3610; GFX10-WGP-NEXT: s_mov_b32 s8, s11 3611; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 3612; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 3613; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3614; GFX10-WGP-NEXT: s_mov_b32 s5, s8 3615; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 3616; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 3617; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3618; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 3619; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 3620; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 3621; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3622; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3623; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3624; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3625; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3626; GFX10-WGP-NEXT: buffer_gl1_inv 3627; GFX10-WGP-NEXT: buffer_gl0_inv 3628; GFX10-WGP-NEXT: s_endpgm 3629; 3630; GFX10-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3631; GFX10-CU: ; %bb.0: ; %entry 3632; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 3633; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3634; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 3635; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 3636; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 3637; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3638; GFX10-CU-NEXT: s_mov_b32 s4, s8 3639; GFX10-CU-NEXT: s_mov_b32 s5, s9 3640; GFX10-CU-NEXT: s_mov_b32 s9, s10 3641; GFX10-CU-NEXT: s_mov_b32 s8, s11 3642; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 3643; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 3644; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3645; GFX10-CU-NEXT: s_mov_b32 s5, s8 3646; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 3647; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 3648; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3649; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 3650; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 3651; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 3652; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3653; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3654; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3655; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3656; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3657; GFX10-CU-NEXT: buffer_gl1_inv 3658; GFX10-CU-NEXT: buffer_gl0_inv 3659; GFX10-CU-NEXT: s_endpgm 3660; 3661; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3662; SKIP-CACHE-INV: ; %bb.0: ; %entry 3663; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 3664; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 3665; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 3666; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 3667; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 3668; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3669; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3670; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3671; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 3672; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 3673; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 3674; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 3675; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 3676; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 3677; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3678; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3679; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3680; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 3681; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3682; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3683; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3684; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3685; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3686; SKIP-CACHE-INV-NEXT: s_endpgm 3687; 3688; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3689; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3690; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3691; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3692; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3693; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3694; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3695; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3696; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3697; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3698; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3699; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3700; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3701; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3702; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3703; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3704; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3705; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3706; 3707; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3708; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3709; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3710; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3711; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3712; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3713; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3714; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3715; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3716; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3717; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3718; GFX90A-TGSPLIT-NEXT: buffer_wbl2 3719; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3720; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3721; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3722; GFX90A-TGSPLIT-NEXT: buffer_invl2 3723; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 3724; GFX90A-TGSPLIT-NEXT: s_endpgm 3725; 3726; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3727; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 3728; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3729; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3730; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3731; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3732; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3733; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3734; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3735; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3736; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3737; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3738; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3739; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3740; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3741; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 3742; GFX940-NOTTGSPLIT-NEXT: s_endpgm 3743; 3744; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3745; GFX940-TGSPLIT: ; %bb.0: ; %entry 3746; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3747; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 3748; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 3749; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3750; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 3751; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 3752; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3753; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3754; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 3755; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 3756; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3757; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 3758; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 3759; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 3760; GFX940-TGSPLIT-NEXT: s_endpgm 3761; 3762; GFX11-WGP-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3763; GFX11-WGP: ; %bb.0: ; %entry 3764; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3765; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3766; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3767; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3768; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 3769; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 3770; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3771; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 3772; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 3773; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 3774; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3775; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3776; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3777; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 3778; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3779; GFX11-WGP-NEXT: buffer_gl1_inv 3780; GFX11-WGP-NEXT: buffer_gl0_inv 3781; GFX11-WGP-NEXT: s_endpgm 3782; 3783; GFX11-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3784; GFX11-CU: ; %bb.0: ; %entry 3785; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3786; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3787; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3788; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3789; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 3790; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 3791; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3792; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 3793; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 3794; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 3795; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3796; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3797; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 3798; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 3799; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 3800; GFX11-CU-NEXT: buffer_gl1_inv 3801; GFX11-CU-NEXT: buffer_gl0_inv 3802; GFX11-CU-NEXT: s_endpgm 3803; 3804; GFX12-WGP-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3805; GFX12-WGP: ; %bb.0: ; %entry 3806; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3807; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 3808; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 3809; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 3810; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 3811; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 3812; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3813; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 3814; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 3815; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 3816; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 3817; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 3818; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 3819; GFX12-WGP-NEXT: s_wait_storecnt 0x0 3820; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 3821; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3822; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 3823; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 3824; GFX12-WGP-NEXT: s_endpgm 3825; 3826; GFX12-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg: 3827; GFX12-CU: ; %bb.0: ; %entry 3828; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3829; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 3830; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 3831; GFX12-CU-NEXT: s_wait_kmcnt 0x0 3832; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 3833; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 3834; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3835; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 3836; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 3837; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 3838; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 3839; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 3840; GFX12-CU-NEXT: s_wait_samplecnt 0x0 3841; GFX12-CU-NEXT: s_wait_storecnt 0x0 3842; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 3843; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 3844; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 3845; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 3846; GFX12-CU-NEXT: s_endpgm 3847 ptr %out, i32 %in, i32 %old) { 3848entry: 3849 %gep = getelementptr i32, ptr %out, i32 4 3850 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic 3851 ret void 3852} 3853 3854define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( 3855; GFX7-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3856; GFX7: ; %bb.0: ; %entry 3857; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 3858; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3859; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 3860; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 3861; GFX7-NEXT: s_mov_b64 s[10:11], 16 3862; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3863; GFX7-NEXT: s_mov_b32 s4, s8 3864; GFX7-NEXT: s_mov_b32 s5, s9 3865; GFX7-NEXT: s_mov_b32 s9, s10 3866; GFX7-NEXT: s_mov_b32 s8, s11 3867; GFX7-NEXT: s_add_u32 s4, s4, s9 3868; GFX7-NEXT: s_addc_u32 s8, s5, s8 3869; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3870; GFX7-NEXT: s_mov_b32 s5, s8 3871; GFX7-NEXT: v_mov_b32_e32 v2, s7 3872; GFX7-NEXT: v_mov_b32_e32 v0, s6 3873; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3874; GFX7-NEXT: v_mov_b32_e32 v3, v0 3875; GFX7-NEXT: v_mov_b32_e32 v0, s4 3876; GFX7-NEXT: v_mov_b32_e32 v1, s5 3877; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3878; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3879; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3880; GFX7-NEXT: buffer_wbinvl1_vol 3881; GFX7-NEXT: s_endpgm 3882; 3883; GFX10-WGP-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3884; GFX10-WGP: ; %bb.0: ; %entry 3885; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 3886; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3887; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 3888; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 3889; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 3890; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3891; GFX10-WGP-NEXT: s_mov_b32 s4, s8 3892; GFX10-WGP-NEXT: s_mov_b32 s5, s9 3893; GFX10-WGP-NEXT: s_mov_b32 s9, s10 3894; GFX10-WGP-NEXT: s_mov_b32 s8, s11 3895; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 3896; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 3897; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3898; GFX10-WGP-NEXT: s_mov_b32 s5, s8 3899; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 3900; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 3901; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3902; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 3903; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 3904; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 3905; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3906; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3907; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3908; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3909; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3910; GFX10-WGP-NEXT: buffer_gl1_inv 3911; GFX10-WGP-NEXT: buffer_gl0_inv 3912; GFX10-WGP-NEXT: s_endpgm 3913; 3914; GFX10-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3915; GFX10-CU: ; %bb.0: ; %entry 3916; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 3917; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3918; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 3919; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 3920; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 3921; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3922; GFX10-CU-NEXT: s_mov_b32 s4, s8 3923; GFX10-CU-NEXT: s_mov_b32 s5, s9 3924; GFX10-CU-NEXT: s_mov_b32 s9, s10 3925; GFX10-CU-NEXT: s_mov_b32 s8, s11 3926; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 3927; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 3928; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 3929; GFX10-CU-NEXT: s_mov_b32 s5, s8 3930; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 3931; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 3932; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3933; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 3934; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 3935; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 3936; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3937; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3938; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3939; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3940; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3941; GFX10-CU-NEXT: buffer_gl1_inv 3942; GFX10-CU-NEXT: buffer_gl0_inv 3943; GFX10-CU-NEXT: s_endpgm 3944; 3945; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3946; SKIP-CACHE-INV: ; %bb.0: ; %entry 3947; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 3948; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 3949; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 3950; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 3951; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 3952; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3953; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3954; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3955; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 3956; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 3957; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 3958; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 3959; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 3960; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 3961; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3962; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3963; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3964; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 3965; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3966; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3967; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3968; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3969; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3970; SKIP-CACHE-INV-NEXT: s_endpgm 3971; 3972; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3973; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 3974; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3975; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3976; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3977; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3978; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3979; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3980; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 3981; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 3982; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 3983; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 3984; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3985; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 3986; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3987; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 3988; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 3989; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 3990; 3991; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 3992; GFX90A-TGSPLIT: ; %bb.0: ; %entry 3993; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 3994; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 3995; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 3996; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 3997; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 3998; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 3999; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4000; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4001; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4002; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4003; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4004; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4005; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4006; GFX90A-TGSPLIT-NEXT: buffer_invl2 4007; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4008; GFX90A-TGSPLIT-NEXT: s_endpgm 4009; 4010; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4011; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4012; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4013; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4014; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4015; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4016; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4017; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4018; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4019; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4020; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4021; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4022; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4023; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4024; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4025; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4026; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4027; 4028; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4029; GFX940-TGSPLIT: ; %bb.0: ; %entry 4030; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4031; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4032; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4033; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4034; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4035; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4036; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4037; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4038; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4039; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4040; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4041; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4042; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4043; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4044; GFX940-TGSPLIT-NEXT: s_endpgm 4045; 4046; GFX11-WGP-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4047; GFX11-WGP: ; %bb.0: ; %entry 4048; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4049; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4050; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4051; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4052; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 4053; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 4054; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4055; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 4056; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 4057; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 4058; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4059; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4060; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4061; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4062; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4063; GFX11-WGP-NEXT: buffer_gl1_inv 4064; GFX11-WGP-NEXT: buffer_gl0_inv 4065; GFX11-WGP-NEXT: s_endpgm 4066; 4067; GFX11-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4068; GFX11-CU: ; %bb.0: ; %entry 4069; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4070; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4071; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4072; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4073; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 4074; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 4075; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4076; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 4077; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 4078; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 4079; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4080; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4081; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4082; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4083; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4084; GFX11-CU-NEXT: buffer_gl1_inv 4085; GFX11-CU-NEXT: buffer_gl0_inv 4086; GFX11-CU-NEXT: s_endpgm 4087; 4088; GFX12-WGP-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4089; GFX12-WGP: ; %bb.0: ; %entry 4090; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4091; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4092; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4093; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4094; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 4095; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 4096; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4097; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 4098; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 4099; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 4100; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 4101; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4102; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4103; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4104; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4105; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4106; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 4107; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4108; GFX12-WGP-NEXT: s_endpgm 4109; 4110; GFX12-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg: 4111; GFX12-CU: ; %bb.0: ; %entry 4112; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4113; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4114; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4115; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4116; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 4117; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 4118; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4119; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 4120; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 4121; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 4122; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 4123; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 4124; GFX12-CU-NEXT: s_wait_samplecnt 0x0 4125; GFX12-CU-NEXT: s_wait_storecnt 0x0 4126; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 4127; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4128; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 4129; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4130; GFX12-CU-NEXT: s_endpgm 4131 ptr %out, i32 %in, i32 %old) { 4132entry: 4133 %gep = getelementptr i32, ptr %out, i32 4 4134 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic 4135 ret void 4136} 4137 4138define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( 4139; GFX7-LABEL: flat_system_monotonic_acquire_cmpxchg: 4140; GFX7: ; %bb.0: ; %entry 4141; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4142; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4143; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4144; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4145; GFX7-NEXT: s_mov_b64 s[10:11], 16 4146; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4147; GFX7-NEXT: s_mov_b32 s4, s8 4148; GFX7-NEXT: s_mov_b32 s5, s9 4149; GFX7-NEXT: s_mov_b32 s9, s10 4150; GFX7-NEXT: s_mov_b32 s8, s11 4151; GFX7-NEXT: s_add_u32 s4, s4, s9 4152; GFX7-NEXT: s_addc_u32 s8, s5, s8 4153; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4154; GFX7-NEXT: s_mov_b32 s5, s8 4155; GFX7-NEXT: v_mov_b32_e32 v2, s7 4156; GFX7-NEXT: v_mov_b32_e32 v0, s6 4157; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4158; GFX7-NEXT: v_mov_b32_e32 v3, v0 4159; GFX7-NEXT: v_mov_b32_e32 v0, s4 4160; GFX7-NEXT: v_mov_b32_e32 v1, s5 4161; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4162; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4163; GFX7-NEXT: buffer_wbinvl1_vol 4164; GFX7-NEXT: s_endpgm 4165; 4166; GFX10-WGP-LABEL: flat_system_monotonic_acquire_cmpxchg: 4167; GFX10-WGP: ; %bb.0: ; %entry 4168; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 4169; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4170; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 4171; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 4172; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 4173; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4174; GFX10-WGP-NEXT: s_mov_b32 s4, s8 4175; GFX10-WGP-NEXT: s_mov_b32 s5, s9 4176; GFX10-WGP-NEXT: s_mov_b32 s9, s10 4177; GFX10-WGP-NEXT: s_mov_b32 s8, s11 4178; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 4179; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 4180; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4181; GFX10-WGP-NEXT: s_mov_b32 s5, s8 4182; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 4183; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 4184; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4185; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 4186; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 4187; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 4188; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4189; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4190; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4191; GFX10-WGP-NEXT: buffer_gl1_inv 4192; GFX10-WGP-NEXT: buffer_gl0_inv 4193; GFX10-WGP-NEXT: s_endpgm 4194; 4195; GFX10-CU-LABEL: flat_system_monotonic_acquire_cmpxchg: 4196; GFX10-CU: ; %bb.0: ; %entry 4197; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 4198; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4199; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 4200; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 4201; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 4202; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4203; GFX10-CU-NEXT: s_mov_b32 s4, s8 4204; GFX10-CU-NEXT: s_mov_b32 s5, s9 4205; GFX10-CU-NEXT: s_mov_b32 s9, s10 4206; GFX10-CU-NEXT: s_mov_b32 s8, s11 4207; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 4208; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 4209; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4210; GFX10-CU-NEXT: s_mov_b32 s5, s8 4211; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 4212; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 4213; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4214; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 4215; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 4216; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 4217; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4218; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4219; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4220; GFX10-CU-NEXT: buffer_gl1_inv 4221; GFX10-CU-NEXT: buffer_gl0_inv 4222; GFX10-CU-NEXT: s_endpgm 4223; 4224; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_cmpxchg: 4225; SKIP-CACHE-INV: ; %bb.0: ; %entry 4226; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 4227; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4228; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 4229; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 4230; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 4231; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4232; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 4233; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 4234; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 4235; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 4236; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 4237; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 4238; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 4239; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 4240; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4241; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4242; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4243; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 4244; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4246; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4247; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4248; SKIP-CACHE-INV-NEXT: s_endpgm 4249; 4250; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: 4251; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4252; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4253; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4254; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4255; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4256; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4257; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4258; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4259; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4260; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4261; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4262; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4263; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4264; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4265; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4266; 4267; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: 4268; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4269; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4270; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4271; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4272; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4273; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4274; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4275; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4276; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4277; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4278; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4279; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4280; GFX90A-TGSPLIT-NEXT: buffer_invl2 4281; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4282; GFX90A-TGSPLIT-NEXT: s_endpgm 4283; 4284; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: 4285; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4286; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4287; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4288; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4289; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4290; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4291; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4292; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4293; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4294; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4295; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4296; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4297; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4298; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4299; 4300; GFX940-TGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: 4301; GFX940-TGSPLIT: ; %bb.0: ; %entry 4302; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4303; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4304; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4305; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4306; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4307; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4308; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4309; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4310; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4311; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4312; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4313; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4314; GFX940-TGSPLIT-NEXT: s_endpgm 4315; 4316; GFX11-WGP-LABEL: flat_system_monotonic_acquire_cmpxchg: 4317; GFX11-WGP: ; %bb.0: ; %entry 4318; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4319; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4320; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4321; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4322; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 4323; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 4324; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4325; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 4326; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 4327; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 4328; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4329; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4330; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4331; GFX11-WGP-NEXT: buffer_gl1_inv 4332; GFX11-WGP-NEXT: buffer_gl0_inv 4333; GFX11-WGP-NEXT: s_endpgm 4334; 4335; GFX11-CU-LABEL: flat_system_monotonic_acquire_cmpxchg: 4336; GFX11-CU: ; %bb.0: ; %entry 4337; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4338; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4339; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4340; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4341; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 4342; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 4343; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4344; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 4345; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 4346; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 4347; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4348; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4349; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4350; GFX11-CU-NEXT: buffer_gl1_inv 4351; GFX11-CU-NEXT: buffer_gl0_inv 4352; GFX11-CU-NEXT: s_endpgm 4353; 4354; GFX12-WGP-LABEL: flat_system_monotonic_acquire_cmpxchg: 4355; GFX12-WGP: ; %bb.0: ; %entry 4356; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4357; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4358; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4359; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4360; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 4361; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 4362; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4363; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 4364; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 4365; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 4366; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4367; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 4368; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4369; GFX12-WGP-NEXT: s_endpgm 4370; 4371; GFX12-CU-LABEL: flat_system_monotonic_acquire_cmpxchg: 4372; GFX12-CU: ; %bb.0: ; %entry 4373; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4374; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4375; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4376; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4377; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 4378; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 4379; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4380; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 4381; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 4382; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 4383; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4384; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 4385; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4386; GFX12-CU-NEXT: s_endpgm 4387 ptr %out, i32 %in, i32 %old) { 4388entry: 4389 %gep = getelementptr i32, ptr %out, i32 4 4390 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire 4391 ret void 4392} 4393 4394define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( 4395; GFX7-LABEL: flat_system_acquire_acquire_cmpxchg: 4396; GFX7: ; %bb.0: ; %entry 4397; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4398; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4399; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4400; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4401; GFX7-NEXT: s_mov_b64 s[10:11], 16 4402; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4403; GFX7-NEXT: s_mov_b32 s4, s8 4404; GFX7-NEXT: s_mov_b32 s5, s9 4405; GFX7-NEXT: s_mov_b32 s9, s10 4406; GFX7-NEXT: s_mov_b32 s8, s11 4407; GFX7-NEXT: s_add_u32 s4, s4, s9 4408; GFX7-NEXT: s_addc_u32 s8, s5, s8 4409; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4410; GFX7-NEXT: s_mov_b32 s5, s8 4411; GFX7-NEXT: v_mov_b32_e32 v2, s7 4412; GFX7-NEXT: v_mov_b32_e32 v0, s6 4413; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4414; GFX7-NEXT: v_mov_b32_e32 v3, v0 4415; GFX7-NEXT: v_mov_b32_e32 v0, s4 4416; GFX7-NEXT: v_mov_b32_e32 v1, s5 4417; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4418; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4419; GFX7-NEXT: buffer_wbinvl1_vol 4420; GFX7-NEXT: s_endpgm 4421; 4422; GFX10-WGP-LABEL: flat_system_acquire_acquire_cmpxchg: 4423; GFX10-WGP: ; %bb.0: ; %entry 4424; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 4425; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4426; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 4427; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 4428; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 4429; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4430; GFX10-WGP-NEXT: s_mov_b32 s4, s8 4431; GFX10-WGP-NEXT: s_mov_b32 s5, s9 4432; GFX10-WGP-NEXT: s_mov_b32 s9, s10 4433; GFX10-WGP-NEXT: s_mov_b32 s8, s11 4434; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 4435; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 4436; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4437; GFX10-WGP-NEXT: s_mov_b32 s5, s8 4438; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 4439; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 4440; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4441; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 4442; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 4443; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 4444; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4445; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4446; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4447; GFX10-WGP-NEXT: buffer_gl1_inv 4448; GFX10-WGP-NEXT: buffer_gl0_inv 4449; GFX10-WGP-NEXT: s_endpgm 4450; 4451; GFX10-CU-LABEL: flat_system_acquire_acquire_cmpxchg: 4452; GFX10-CU: ; %bb.0: ; %entry 4453; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 4454; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4455; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 4456; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 4457; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 4458; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4459; GFX10-CU-NEXT: s_mov_b32 s4, s8 4460; GFX10-CU-NEXT: s_mov_b32 s5, s9 4461; GFX10-CU-NEXT: s_mov_b32 s9, s10 4462; GFX10-CU-NEXT: s_mov_b32 s8, s11 4463; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 4464; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 4465; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4466; GFX10-CU-NEXT: s_mov_b32 s5, s8 4467; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 4468; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 4469; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4470; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 4471; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 4472; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 4473; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4474; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4475; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4476; GFX10-CU-NEXT: buffer_gl1_inv 4477; GFX10-CU-NEXT: buffer_gl0_inv 4478; GFX10-CU-NEXT: s_endpgm 4479; 4480; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_cmpxchg: 4481; SKIP-CACHE-INV: ; %bb.0: ; %entry 4482; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 4483; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4484; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 4485; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 4486; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 4487; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4488; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 4489; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 4490; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 4491; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 4492; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 4493; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 4494; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 4495; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 4496; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4497; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4498; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4499; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 4500; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4501; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4502; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4503; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4504; SKIP-CACHE-INV-NEXT: s_endpgm 4505; 4506; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: 4507; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4508; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4509; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4510; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4511; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4512; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4513; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4514; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4515; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4516; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4517; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4518; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4519; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4520; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4521; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4522; 4523; GFX90A-TGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: 4524; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4525; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4526; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4527; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4528; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4529; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4530; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4531; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4532; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4533; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4534; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4535; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4536; GFX90A-TGSPLIT-NEXT: buffer_invl2 4537; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4538; GFX90A-TGSPLIT-NEXT: s_endpgm 4539; 4540; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: 4541; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4542; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4543; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4544; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4545; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4546; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4547; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4548; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4549; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4550; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4551; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4552; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4553; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4554; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4555; 4556; GFX940-TGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: 4557; GFX940-TGSPLIT: ; %bb.0: ; %entry 4558; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4559; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4560; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4561; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4562; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4563; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4564; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4565; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4566; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4567; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4568; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4569; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4570; GFX940-TGSPLIT-NEXT: s_endpgm 4571; 4572; GFX11-WGP-LABEL: flat_system_acquire_acquire_cmpxchg: 4573; GFX11-WGP: ; %bb.0: ; %entry 4574; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4575; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4576; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4577; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4578; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 4579; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 4580; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4581; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 4582; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 4583; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 4584; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4585; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4586; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4587; GFX11-WGP-NEXT: buffer_gl1_inv 4588; GFX11-WGP-NEXT: buffer_gl0_inv 4589; GFX11-WGP-NEXT: s_endpgm 4590; 4591; GFX11-CU-LABEL: flat_system_acquire_acquire_cmpxchg: 4592; GFX11-CU: ; %bb.0: ; %entry 4593; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4594; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4595; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4596; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4597; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 4598; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 4599; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4600; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 4601; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 4602; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 4603; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4604; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4605; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4606; GFX11-CU-NEXT: buffer_gl1_inv 4607; GFX11-CU-NEXT: buffer_gl0_inv 4608; GFX11-CU-NEXT: s_endpgm 4609; 4610; GFX12-WGP-LABEL: flat_system_acquire_acquire_cmpxchg: 4611; GFX12-WGP: ; %bb.0: ; %entry 4612; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4613; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4614; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4615; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4616; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 4617; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 4618; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4619; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 4620; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 4621; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 4622; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4623; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 4624; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4625; GFX12-WGP-NEXT: s_endpgm 4626; 4627; GFX12-CU-LABEL: flat_system_acquire_acquire_cmpxchg: 4628; GFX12-CU: ; %bb.0: ; %entry 4629; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4630; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4631; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4632; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4633; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 4634; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 4635; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4636; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 4637; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 4638; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 4639; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4640; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 4641; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4642; GFX12-CU-NEXT: s_endpgm 4643 ptr %out, i32 %in, i32 %old) { 4644entry: 4645 %gep = getelementptr i32, ptr %out, i32 4 4646 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire 4647 ret void 4648} 4649 4650define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( 4651; GFX7-LABEL: flat_system_release_acquire_cmpxchg: 4652; GFX7: ; %bb.0: ; %entry 4653; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4654; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4655; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4656; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4657; GFX7-NEXT: s_mov_b64 s[10:11], 16 4658; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4659; GFX7-NEXT: s_mov_b32 s4, s8 4660; GFX7-NEXT: s_mov_b32 s5, s9 4661; GFX7-NEXT: s_mov_b32 s9, s10 4662; GFX7-NEXT: s_mov_b32 s8, s11 4663; GFX7-NEXT: s_add_u32 s4, s4, s9 4664; GFX7-NEXT: s_addc_u32 s8, s5, s8 4665; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4666; GFX7-NEXT: s_mov_b32 s5, s8 4667; GFX7-NEXT: v_mov_b32_e32 v2, s7 4668; GFX7-NEXT: v_mov_b32_e32 v0, s6 4669; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4670; GFX7-NEXT: v_mov_b32_e32 v3, v0 4671; GFX7-NEXT: v_mov_b32_e32 v0, s4 4672; GFX7-NEXT: v_mov_b32_e32 v1, s5 4673; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4674; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4675; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4676; GFX7-NEXT: buffer_wbinvl1_vol 4677; GFX7-NEXT: s_endpgm 4678; 4679; GFX10-WGP-LABEL: flat_system_release_acquire_cmpxchg: 4680; GFX10-WGP: ; %bb.0: ; %entry 4681; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 4682; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4683; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 4684; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 4685; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 4686; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4687; GFX10-WGP-NEXT: s_mov_b32 s4, s8 4688; GFX10-WGP-NEXT: s_mov_b32 s5, s9 4689; GFX10-WGP-NEXT: s_mov_b32 s9, s10 4690; GFX10-WGP-NEXT: s_mov_b32 s8, s11 4691; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 4692; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 4693; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4694; GFX10-WGP-NEXT: s_mov_b32 s5, s8 4695; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 4696; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 4697; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4698; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 4699; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 4700; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 4701; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4702; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4703; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4704; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4705; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4706; GFX10-WGP-NEXT: buffer_gl1_inv 4707; GFX10-WGP-NEXT: buffer_gl0_inv 4708; GFX10-WGP-NEXT: s_endpgm 4709; 4710; GFX10-CU-LABEL: flat_system_release_acquire_cmpxchg: 4711; GFX10-CU: ; %bb.0: ; %entry 4712; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 4713; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4714; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 4715; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 4716; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 4717; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4718; GFX10-CU-NEXT: s_mov_b32 s4, s8 4719; GFX10-CU-NEXT: s_mov_b32 s5, s9 4720; GFX10-CU-NEXT: s_mov_b32 s9, s10 4721; GFX10-CU-NEXT: s_mov_b32 s8, s11 4722; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 4723; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 4724; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4725; GFX10-CU-NEXT: s_mov_b32 s5, s8 4726; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 4727; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 4728; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4729; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 4730; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 4731; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 4732; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4733; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4734; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4735; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4736; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4737; GFX10-CU-NEXT: buffer_gl1_inv 4738; GFX10-CU-NEXT: buffer_gl0_inv 4739; GFX10-CU-NEXT: s_endpgm 4740; 4741; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_cmpxchg: 4742; SKIP-CACHE-INV: ; %bb.0: ; %entry 4743; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 4744; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4745; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 4746; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 4747; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 4748; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4749; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 4750; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 4751; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 4752; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 4753; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 4754; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 4755; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 4756; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 4757; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4758; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4759; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4760; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 4761; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4762; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4763; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4764; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4765; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4766; SKIP-CACHE-INV-NEXT: s_endpgm 4767; 4768; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: 4769; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 4770; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4771; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4772; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4773; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4774; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4775; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4776; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4777; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4778; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4779; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 4780; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4781; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4782; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4783; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 4784; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 4785; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 4786; 4787; GFX90A-TGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: 4788; GFX90A-TGSPLIT: ; %bb.0: ; %entry 4789; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 4790; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 4791; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 4792; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4793; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 4794; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 4795; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4796; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4797; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 4798; GFX90A-TGSPLIT-NEXT: buffer_wbl2 4799; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4800; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 4801; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4802; GFX90A-TGSPLIT-NEXT: buffer_invl2 4803; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 4804; GFX90A-TGSPLIT-NEXT: s_endpgm 4805; 4806; GFX940-NOTTGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: 4807; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 4808; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4809; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4810; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4811; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4812; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4813; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4814; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4815; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4816; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4817; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4818; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4819; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4820; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4821; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 4822; GFX940-NOTTGSPLIT-NEXT: s_endpgm 4823; 4824; GFX940-TGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: 4825; GFX940-TGSPLIT: ; %bb.0: ; %entry 4826; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4827; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 4828; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 4829; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 4830; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 4831; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 4832; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4833; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 4834; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 4835; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 4836; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4837; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 4838; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 4839; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 4840; GFX940-TGSPLIT-NEXT: s_endpgm 4841; 4842; GFX11-WGP-LABEL: flat_system_release_acquire_cmpxchg: 4843; GFX11-WGP: ; %bb.0: ; %entry 4844; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4845; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4846; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4847; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4848; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 4849; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 4850; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4851; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 4852; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 4853; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 4854; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4855; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4856; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4857; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 4858; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4859; GFX11-WGP-NEXT: buffer_gl1_inv 4860; GFX11-WGP-NEXT: buffer_gl0_inv 4861; GFX11-WGP-NEXT: s_endpgm 4862; 4863; GFX11-CU-LABEL: flat_system_release_acquire_cmpxchg: 4864; GFX11-CU: ; %bb.0: ; %entry 4865; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4866; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4867; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4868; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4869; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 4870; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 4871; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4872; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 4873; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 4874; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 4875; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4876; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4877; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 4878; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 4879; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 4880; GFX11-CU-NEXT: buffer_gl1_inv 4881; GFX11-CU-NEXT: buffer_gl0_inv 4882; GFX11-CU-NEXT: s_endpgm 4883; 4884; GFX12-WGP-LABEL: flat_system_release_acquire_cmpxchg: 4885; GFX12-WGP: ; %bb.0: ; %entry 4886; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4887; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 4888; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 4889; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 4890; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 4891; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 4892; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4893; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 4894; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 4895; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 4896; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 4897; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 4898; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 4899; GFX12-WGP-NEXT: s_wait_storecnt 0x0 4900; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 4901; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4902; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 4903; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 4904; GFX12-WGP-NEXT: s_endpgm 4905; 4906; GFX12-CU-LABEL: flat_system_release_acquire_cmpxchg: 4907; GFX12-CU: ; %bb.0: ; %entry 4908; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4909; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 4910; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 4911; GFX12-CU-NEXT: s_wait_kmcnt 0x0 4912; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 4913; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 4914; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4915; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 4916; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 4917; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 4918; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 4919; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 4920; GFX12-CU-NEXT: s_wait_samplecnt 0x0 4921; GFX12-CU-NEXT: s_wait_storecnt 0x0 4922; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 4923; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 4924; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 4925; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 4926; GFX12-CU-NEXT: s_endpgm 4927 ptr %out, i32 %in, i32 %old) { 4928entry: 4929 %gep = getelementptr i32, ptr %out, i32 4 4930 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire 4931 ret void 4932} 4933 4934define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( 4935; GFX7-LABEL: flat_system_acq_rel_acquire_cmpxchg: 4936; GFX7: ; %bb.0: ; %entry 4937; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 4938; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4939; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 4940; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 4941; GFX7-NEXT: s_mov_b64 s[10:11], 16 4942; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4943; GFX7-NEXT: s_mov_b32 s4, s8 4944; GFX7-NEXT: s_mov_b32 s5, s9 4945; GFX7-NEXT: s_mov_b32 s9, s10 4946; GFX7-NEXT: s_mov_b32 s8, s11 4947; GFX7-NEXT: s_add_u32 s4, s4, s9 4948; GFX7-NEXT: s_addc_u32 s8, s5, s8 4949; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4950; GFX7-NEXT: s_mov_b32 s5, s8 4951; GFX7-NEXT: v_mov_b32_e32 v2, s7 4952; GFX7-NEXT: v_mov_b32_e32 v0, s6 4953; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4954; GFX7-NEXT: v_mov_b32_e32 v3, v0 4955; GFX7-NEXT: v_mov_b32_e32 v0, s4 4956; GFX7-NEXT: v_mov_b32_e32 v1, s5 4957; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4958; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4959; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4960; GFX7-NEXT: buffer_wbinvl1_vol 4961; GFX7-NEXT: s_endpgm 4962; 4963; GFX10-WGP-LABEL: flat_system_acq_rel_acquire_cmpxchg: 4964; GFX10-WGP: ; %bb.0: ; %entry 4965; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 4966; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4967; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 4968; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 4969; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 4970; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4971; GFX10-WGP-NEXT: s_mov_b32 s4, s8 4972; GFX10-WGP-NEXT: s_mov_b32 s5, s9 4973; GFX10-WGP-NEXT: s_mov_b32 s9, s10 4974; GFX10-WGP-NEXT: s_mov_b32 s8, s11 4975; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 4976; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 4977; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 4978; GFX10-WGP-NEXT: s_mov_b32 s5, s8 4979; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 4980; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 4981; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 4982; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 4983; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 4984; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 4985; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 4986; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4987; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4988; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4989; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4990; GFX10-WGP-NEXT: buffer_gl1_inv 4991; GFX10-WGP-NEXT: buffer_gl0_inv 4992; GFX10-WGP-NEXT: s_endpgm 4993; 4994; GFX10-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg: 4995; GFX10-CU: ; %bb.0: ; %entry 4996; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 4997; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4998; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 4999; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 5000; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 5001; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5002; GFX10-CU-NEXT: s_mov_b32 s4, s8 5003; GFX10-CU-NEXT: s_mov_b32 s5, s9 5004; GFX10-CU-NEXT: s_mov_b32 s9, s10 5005; GFX10-CU-NEXT: s_mov_b32 s8, s11 5006; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 5007; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 5008; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5009; GFX10-CU-NEXT: s_mov_b32 s5, s8 5010; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 5011; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 5012; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5013; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 5014; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 5015; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 5016; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5017; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5018; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5019; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5020; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5021; GFX10-CU-NEXT: buffer_gl1_inv 5022; GFX10-CU-NEXT: buffer_gl0_inv 5023; GFX10-CU-NEXT: s_endpgm 5024; 5025; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5026; SKIP-CACHE-INV: ; %bb.0: ; %entry 5027; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 5028; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5029; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 5030; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 5031; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 5032; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5033; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 5034; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 5035; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 5036; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 5037; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 5038; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 5039; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 5040; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 5041; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5042; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5043; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5044; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 5045; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5046; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5047; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5048; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5049; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5050; SKIP-CACHE-INV-NEXT: s_endpgm 5051; 5052; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5053; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5054; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5055; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5056; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5057; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5058; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5059; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5060; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5061; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5062; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5063; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5064; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5065; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5066; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5067; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5068; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5069; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5070; 5071; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5072; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5073; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5074; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5075; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5076; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5077; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5078; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5079; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5080; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5081; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5082; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5083; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5084; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5085; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5086; GFX90A-TGSPLIT-NEXT: buffer_invl2 5087; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5088; GFX90A-TGSPLIT-NEXT: s_endpgm 5089; 5090; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5091; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5092; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5093; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5094; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5095; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5096; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5097; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5098; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5099; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5100; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5101; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5102; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5103; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5104; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5105; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5106; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5107; 5108; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5109; GFX940-TGSPLIT: ; %bb.0: ; %entry 5110; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5111; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5112; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5113; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5114; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5115; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5116; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5117; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5118; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5119; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5120; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5121; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5122; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5123; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5124; GFX940-TGSPLIT-NEXT: s_endpgm 5125; 5126; GFX11-WGP-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5127; GFX11-WGP: ; %bb.0: ; %entry 5128; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5129; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5130; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5131; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5132; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 5133; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 5134; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5135; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 5136; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 5137; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 5138; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5139; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5140; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5141; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5142; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5143; GFX11-WGP-NEXT: buffer_gl1_inv 5144; GFX11-WGP-NEXT: buffer_gl0_inv 5145; GFX11-WGP-NEXT: s_endpgm 5146; 5147; GFX11-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5148; GFX11-CU: ; %bb.0: ; %entry 5149; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5150; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5151; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5152; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5153; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 5154; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 5155; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5156; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 5157; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 5158; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 5159; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5160; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5161; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5162; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5163; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5164; GFX11-CU-NEXT: buffer_gl1_inv 5165; GFX11-CU-NEXT: buffer_gl0_inv 5166; GFX11-CU-NEXT: s_endpgm 5167; 5168; GFX12-WGP-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5169; GFX12-WGP: ; %bb.0: ; %entry 5170; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5171; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5172; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5173; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5174; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 5175; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 5176; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5177; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 5178; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 5179; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 5180; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5181; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5182; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5183; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5184; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5185; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5186; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 5187; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5188; GFX12-WGP-NEXT: s_endpgm 5189; 5190; GFX12-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg: 5191; GFX12-CU: ; %bb.0: ; %entry 5192; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5193; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5194; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5195; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5196; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 5197; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 5198; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5199; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 5200; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 5201; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 5202; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5203; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5204; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5205; GFX12-CU-NEXT: s_wait_storecnt 0x0 5206; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5207; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5208; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 5209; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5210; GFX12-CU-NEXT: s_endpgm 5211 ptr %out, i32 %in, i32 %old) { 5212entry: 5213 %gep = getelementptr i32, ptr %out, i32 4 5214 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire 5215 ret void 5216} 5217 5218define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( 5219; GFX7-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5220; GFX7: ; %bb.0: ; %entry 5221; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5222; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5223; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5224; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5225; GFX7-NEXT: s_mov_b64 s[10:11], 16 5226; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5227; GFX7-NEXT: s_mov_b32 s4, s8 5228; GFX7-NEXT: s_mov_b32 s5, s9 5229; GFX7-NEXT: s_mov_b32 s9, s10 5230; GFX7-NEXT: s_mov_b32 s8, s11 5231; GFX7-NEXT: s_add_u32 s4, s4, s9 5232; GFX7-NEXT: s_addc_u32 s8, s5, s8 5233; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5234; GFX7-NEXT: s_mov_b32 s5, s8 5235; GFX7-NEXT: v_mov_b32_e32 v2, s7 5236; GFX7-NEXT: v_mov_b32_e32 v0, s6 5237; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5238; GFX7-NEXT: v_mov_b32_e32 v3, v0 5239; GFX7-NEXT: v_mov_b32_e32 v0, s4 5240; GFX7-NEXT: v_mov_b32_e32 v1, s5 5241; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5242; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5243; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5244; GFX7-NEXT: buffer_wbinvl1_vol 5245; GFX7-NEXT: s_endpgm 5246; 5247; GFX10-WGP-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5248; GFX10-WGP: ; %bb.0: ; %entry 5249; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 5250; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5251; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 5252; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 5253; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 5254; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5255; GFX10-WGP-NEXT: s_mov_b32 s4, s8 5256; GFX10-WGP-NEXT: s_mov_b32 s5, s9 5257; GFX10-WGP-NEXT: s_mov_b32 s9, s10 5258; GFX10-WGP-NEXT: s_mov_b32 s8, s11 5259; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 5260; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 5261; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5262; GFX10-WGP-NEXT: s_mov_b32 s5, s8 5263; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 5264; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 5265; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5266; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 5267; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 5268; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 5269; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5270; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5271; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5272; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5273; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5274; GFX10-WGP-NEXT: buffer_gl1_inv 5275; GFX10-WGP-NEXT: buffer_gl0_inv 5276; GFX10-WGP-NEXT: s_endpgm 5277; 5278; GFX10-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5279; GFX10-CU: ; %bb.0: ; %entry 5280; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 5281; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5282; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 5283; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 5284; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 5285; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5286; GFX10-CU-NEXT: s_mov_b32 s4, s8 5287; GFX10-CU-NEXT: s_mov_b32 s5, s9 5288; GFX10-CU-NEXT: s_mov_b32 s9, s10 5289; GFX10-CU-NEXT: s_mov_b32 s8, s11 5290; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 5291; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 5292; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5293; GFX10-CU-NEXT: s_mov_b32 s5, s8 5294; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 5295; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 5296; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5297; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 5298; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 5299; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 5300; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5301; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5302; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5303; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5304; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5305; GFX10-CU-NEXT: buffer_gl1_inv 5306; GFX10-CU-NEXT: buffer_gl0_inv 5307; GFX10-CU-NEXT: s_endpgm 5308; 5309; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5310; SKIP-CACHE-INV: ; %bb.0: ; %entry 5311; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 5312; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5313; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 5314; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 5315; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 5316; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5317; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 5318; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 5319; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 5320; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 5321; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 5322; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 5323; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 5324; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 5325; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5326; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5327; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5328; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 5329; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5330; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5331; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5332; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5333; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5334; SKIP-CACHE-INV-NEXT: s_endpgm 5335; 5336; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5337; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5338; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5339; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5340; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5341; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5342; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5343; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5344; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5345; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5346; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5347; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5348; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5349; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5350; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5351; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5352; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5353; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5354; 5355; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5356; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5357; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5358; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5359; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5360; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5361; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5362; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5363; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5364; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5365; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5366; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5367; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5368; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5369; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5370; GFX90A-TGSPLIT-NEXT: buffer_invl2 5371; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5372; GFX90A-TGSPLIT-NEXT: s_endpgm 5373; 5374; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5375; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5376; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5377; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5378; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5379; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5380; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5381; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5382; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5383; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5384; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5385; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5386; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5387; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5388; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5389; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5390; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5391; 5392; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5393; GFX940-TGSPLIT: ; %bb.0: ; %entry 5394; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5395; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5396; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5397; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5398; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5399; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5400; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5401; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5402; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5403; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5404; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5405; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5406; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5407; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5408; GFX940-TGSPLIT-NEXT: s_endpgm 5409; 5410; GFX11-WGP-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5411; GFX11-WGP: ; %bb.0: ; %entry 5412; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5413; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5414; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5415; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5416; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 5417; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 5418; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5419; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 5420; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 5421; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 5422; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5423; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5424; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5425; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5426; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5427; GFX11-WGP-NEXT: buffer_gl1_inv 5428; GFX11-WGP-NEXT: buffer_gl0_inv 5429; GFX11-WGP-NEXT: s_endpgm 5430; 5431; GFX11-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5432; GFX11-CU: ; %bb.0: ; %entry 5433; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5434; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5435; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5436; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5437; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 5438; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 5439; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5440; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 5441; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 5442; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 5443; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5444; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5445; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5446; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5447; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5448; GFX11-CU-NEXT: buffer_gl1_inv 5449; GFX11-CU-NEXT: buffer_gl0_inv 5450; GFX11-CU-NEXT: s_endpgm 5451; 5452; GFX12-WGP-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5453; GFX12-WGP: ; %bb.0: ; %entry 5454; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5455; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5456; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5457; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5458; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 5459; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 5460; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5461; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 5462; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 5463; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 5464; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5465; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5466; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5467; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5468; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5469; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5470; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 5471; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5472; GFX12-WGP-NEXT: s_endpgm 5473; 5474; GFX12-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg: 5475; GFX12-CU: ; %bb.0: ; %entry 5476; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5477; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5478; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5479; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5480; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 5481; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 5482; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5483; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 5484; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 5485; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 5486; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5487; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5488; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5489; GFX12-CU-NEXT: s_wait_storecnt 0x0 5490; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5491; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5492; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 5493; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5494; GFX12-CU-NEXT: s_endpgm 5495 ptr %out, i32 %in, i32 %old) { 5496entry: 5497 %gep = getelementptr i32, ptr %out, i32 4 5498 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire 5499 ret void 5500} 5501 5502define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( 5503; GFX7-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5504; GFX7: ; %bb.0: ; %entry 5505; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5506; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5507; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5508; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5509; GFX7-NEXT: s_mov_b64 s[10:11], 16 5510; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5511; GFX7-NEXT: s_mov_b32 s4, s8 5512; GFX7-NEXT: s_mov_b32 s5, s9 5513; GFX7-NEXT: s_mov_b32 s9, s10 5514; GFX7-NEXT: s_mov_b32 s8, s11 5515; GFX7-NEXT: s_add_u32 s4, s4, s9 5516; GFX7-NEXT: s_addc_u32 s8, s5, s8 5517; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5518; GFX7-NEXT: s_mov_b32 s5, s8 5519; GFX7-NEXT: v_mov_b32_e32 v2, s7 5520; GFX7-NEXT: v_mov_b32_e32 v0, s6 5521; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5522; GFX7-NEXT: v_mov_b32_e32 v3, v0 5523; GFX7-NEXT: v_mov_b32_e32 v0, s4 5524; GFX7-NEXT: v_mov_b32_e32 v1, s5 5525; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5526; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5527; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5528; GFX7-NEXT: buffer_wbinvl1_vol 5529; GFX7-NEXT: s_endpgm 5530; 5531; GFX10-WGP-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5532; GFX10-WGP: ; %bb.0: ; %entry 5533; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 5534; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5535; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 5536; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 5537; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 5538; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5539; GFX10-WGP-NEXT: s_mov_b32 s4, s8 5540; GFX10-WGP-NEXT: s_mov_b32 s5, s9 5541; GFX10-WGP-NEXT: s_mov_b32 s9, s10 5542; GFX10-WGP-NEXT: s_mov_b32 s8, s11 5543; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 5544; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 5545; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5546; GFX10-WGP-NEXT: s_mov_b32 s5, s8 5547; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 5548; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 5549; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5550; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 5551; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 5552; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 5553; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5554; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5555; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5556; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5557; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5558; GFX10-WGP-NEXT: buffer_gl1_inv 5559; GFX10-WGP-NEXT: buffer_gl0_inv 5560; GFX10-WGP-NEXT: s_endpgm 5561; 5562; GFX10-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5563; GFX10-CU: ; %bb.0: ; %entry 5564; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 5565; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5566; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 5567; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 5568; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 5569; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5570; GFX10-CU-NEXT: s_mov_b32 s4, s8 5571; GFX10-CU-NEXT: s_mov_b32 s5, s9 5572; GFX10-CU-NEXT: s_mov_b32 s9, s10 5573; GFX10-CU-NEXT: s_mov_b32 s8, s11 5574; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 5575; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 5576; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5577; GFX10-CU-NEXT: s_mov_b32 s5, s8 5578; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 5579; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 5580; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5581; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 5582; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 5583; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 5584; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5585; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5586; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5587; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5588; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5589; GFX10-CU-NEXT: buffer_gl1_inv 5590; GFX10-CU-NEXT: buffer_gl0_inv 5591; GFX10-CU-NEXT: s_endpgm 5592; 5593; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5594; SKIP-CACHE-INV: ; %bb.0: ; %entry 5595; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 5596; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5597; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 5598; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 5599; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 5600; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5601; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 5602; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 5603; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 5604; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 5605; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 5606; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 5607; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 5608; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 5609; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5610; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5611; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5612; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 5613; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5614; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5615; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5616; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5617; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5618; SKIP-CACHE-INV-NEXT: s_endpgm 5619; 5620; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5621; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5622; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5623; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5624; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5625; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5626; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5627; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5628; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5629; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5630; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5631; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5632; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5633; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5634; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5635; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5636; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5637; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5638; 5639; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5640; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5641; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5642; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5643; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5644; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5645; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5646; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5647; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5648; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5649; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5650; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5651; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5652; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5653; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5654; GFX90A-TGSPLIT-NEXT: buffer_invl2 5655; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5656; GFX90A-TGSPLIT-NEXT: s_endpgm 5657; 5658; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5659; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5660; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5661; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5662; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5663; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5664; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5665; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5666; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5667; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5668; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5669; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5670; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5671; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5672; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5673; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5674; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5675; 5676; GFX940-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5677; GFX940-TGSPLIT: ; %bb.0: ; %entry 5678; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5679; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5680; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5681; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5682; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5683; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5684; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5685; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5686; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5687; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5688; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5689; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5690; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5691; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5692; GFX940-TGSPLIT-NEXT: s_endpgm 5693; 5694; GFX11-WGP-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5695; GFX11-WGP: ; %bb.0: ; %entry 5696; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5697; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5698; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5699; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5700; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 5701; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 5702; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5703; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 5704; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 5705; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 5706; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5707; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5708; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5709; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5710; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5711; GFX11-WGP-NEXT: buffer_gl1_inv 5712; GFX11-WGP-NEXT: buffer_gl0_inv 5713; GFX11-WGP-NEXT: s_endpgm 5714; 5715; GFX11-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5716; GFX11-CU: ; %bb.0: ; %entry 5717; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5718; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5719; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5720; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5721; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 5722; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 5723; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5724; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 5725; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 5726; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 5727; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5728; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5729; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5730; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 5731; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 5732; GFX11-CU-NEXT: buffer_gl1_inv 5733; GFX11-CU-NEXT: buffer_gl0_inv 5734; GFX11-CU-NEXT: s_endpgm 5735; 5736; GFX12-WGP-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5737; GFX12-WGP: ; %bb.0: ; %entry 5738; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5739; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5740; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5741; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 5742; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 5743; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 5744; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5745; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 5746; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 5747; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 5748; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 5749; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 5750; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 5751; GFX12-WGP-NEXT: s_wait_storecnt 0x0 5752; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 5753; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5754; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 5755; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 5756; GFX12-WGP-NEXT: s_endpgm 5757; 5758; GFX12-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg: 5759; GFX12-CU: ; %bb.0: ; %entry 5760; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5761; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 5762; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 5763; GFX12-CU-NEXT: s_wait_kmcnt 0x0 5764; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 5765; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 5766; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5767; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 5768; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 5769; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 5770; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 5771; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 5772; GFX12-CU-NEXT: s_wait_samplecnt 0x0 5773; GFX12-CU-NEXT: s_wait_storecnt 0x0 5774; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 5775; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 5776; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 5777; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 5778; GFX12-CU-NEXT: s_endpgm 5779 ptr %out, i32 %in, i32 %old) { 5780entry: 5781 %gep = getelementptr i32, ptr %out, i32 4 5782 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst 5783 ret void 5784} 5785 5786define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( 5787; GFX7-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5788; GFX7: ; %bb.0: ; %entry 5789; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 5790; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5791; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 5792; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 5793; GFX7-NEXT: s_mov_b64 s[10:11], 16 5794; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5795; GFX7-NEXT: s_mov_b32 s4, s8 5796; GFX7-NEXT: s_mov_b32 s5, s9 5797; GFX7-NEXT: s_mov_b32 s9, s10 5798; GFX7-NEXT: s_mov_b32 s8, s11 5799; GFX7-NEXT: s_add_u32 s4, s4, s9 5800; GFX7-NEXT: s_addc_u32 s8, s5, s8 5801; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5802; GFX7-NEXT: s_mov_b32 s5, s8 5803; GFX7-NEXT: v_mov_b32_e32 v2, s7 5804; GFX7-NEXT: v_mov_b32_e32 v0, s6 5805; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5806; GFX7-NEXT: v_mov_b32_e32 v3, v0 5807; GFX7-NEXT: v_mov_b32_e32 v0, s4 5808; GFX7-NEXT: v_mov_b32_e32 v1, s5 5809; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5810; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5811; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5812; GFX7-NEXT: buffer_wbinvl1_vol 5813; GFX7-NEXT: s_endpgm 5814; 5815; GFX10-WGP-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5816; GFX10-WGP: ; %bb.0: ; %entry 5817; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 5818; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5819; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 5820; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 5821; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 5822; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5823; GFX10-WGP-NEXT: s_mov_b32 s4, s8 5824; GFX10-WGP-NEXT: s_mov_b32 s5, s9 5825; GFX10-WGP-NEXT: s_mov_b32 s9, s10 5826; GFX10-WGP-NEXT: s_mov_b32 s8, s11 5827; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 5828; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 5829; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5830; GFX10-WGP-NEXT: s_mov_b32 s5, s8 5831; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 5832; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 5833; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5834; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 5835; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 5836; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 5837; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5838; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5839; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5840; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5841; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5842; GFX10-WGP-NEXT: buffer_gl1_inv 5843; GFX10-WGP-NEXT: buffer_gl0_inv 5844; GFX10-WGP-NEXT: s_endpgm 5845; 5846; GFX10-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5847; GFX10-CU: ; %bb.0: ; %entry 5848; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 5849; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5850; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 5851; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 5852; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 5853; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5854; GFX10-CU-NEXT: s_mov_b32 s4, s8 5855; GFX10-CU-NEXT: s_mov_b32 s5, s9 5856; GFX10-CU-NEXT: s_mov_b32 s9, s10 5857; GFX10-CU-NEXT: s_mov_b32 s8, s11 5858; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 5859; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 5860; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 5861; GFX10-CU-NEXT: s_mov_b32 s5, s8 5862; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 5863; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 5864; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5865; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 5866; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 5867; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 5868; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5869; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5870; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5871; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5872; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5873; GFX10-CU-NEXT: buffer_gl1_inv 5874; GFX10-CU-NEXT: buffer_gl0_inv 5875; GFX10-CU-NEXT: s_endpgm 5876; 5877; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5878; SKIP-CACHE-INV: ; %bb.0: ; %entry 5879; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 5880; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 5881; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 5882; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 5883; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 5884; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5885; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 5886; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 5887; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 5888; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 5889; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 5890; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 5891; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 5892; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 5893; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 5894; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 5895; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5896; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 5897; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5898; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5899; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5900; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 5901; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5902; SKIP-CACHE-INV-NEXT: s_endpgm 5903; 5904; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5905; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 5906; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5907; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5908; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5909; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5910; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5911; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5912; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5913; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5914; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5915; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 5916; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5917; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5918; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5919; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 5920; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 5921; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 5922; 5923; GFX90A-TGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5924; GFX90A-TGSPLIT: ; %bb.0: ; %entry 5925; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 5926; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 5927; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 5928; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5929; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 5930; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 5931; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5932; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5933; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 5934; GFX90A-TGSPLIT-NEXT: buffer_wbl2 5935; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5936; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 5937; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5938; GFX90A-TGSPLIT-NEXT: buffer_invl2 5939; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 5940; GFX90A-TGSPLIT-NEXT: s_endpgm 5941; 5942; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5943; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 5944; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5945; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5946; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5947; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5948; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5949; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5950; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5951; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5952; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5953; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5954; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5955; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5956; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5957; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 5958; GFX940-NOTTGSPLIT-NEXT: s_endpgm 5959; 5960; GFX940-TGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5961; GFX940-TGSPLIT: ; %bb.0: ; %entry 5962; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5963; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 5964; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 5965; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 5966; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 5967; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 5968; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5969; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 5970; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 5971; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 5972; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5973; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 5974; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 5975; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 5976; GFX940-TGSPLIT-NEXT: s_endpgm 5977; 5978; GFX11-WGP-LABEL: flat_system_acquire_seq_cst_cmpxchg: 5979; GFX11-WGP: ; %bb.0: ; %entry 5980; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 5981; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 5982; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 5983; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5984; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 5985; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 5986; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 5987; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 5988; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 5989; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 5990; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 5991; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5992; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 5993; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 5994; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5995; GFX11-WGP-NEXT: buffer_gl1_inv 5996; GFX11-WGP-NEXT: buffer_gl0_inv 5997; GFX11-WGP-NEXT: s_endpgm 5998; 5999; GFX11-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg: 6000; GFX11-CU: ; %bb.0: ; %entry 6001; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6002; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6003; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6004; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6005; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 6006; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 6007; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6008; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 6009; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 6010; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 6011; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6012; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6013; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6014; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6015; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6016; GFX11-CU-NEXT: buffer_gl1_inv 6017; GFX11-CU-NEXT: buffer_gl0_inv 6018; GFX11-CU-NEXT: s_endpgm 6019; 6020; GFX12-WGP-LABEL: flat_system_acquire_seq_cst_cmpxchg: 6021; GFX12-WGP: ; %bb.0: ; %entry 6022; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6023; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6024; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6025; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6026; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 6027; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 6028; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6029; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 6030; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 6031; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 6032; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6033; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6034; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6035; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6036; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6037; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6038; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 6039; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6040; GFX12-WGP-NEXT: s_endpgm 6041; 6042; GFX12-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg: 6043; GFX12-CU: ; %bb.0: ; %entry 6044; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6045; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6046; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6047; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6048; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 6049; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 6050; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6051; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 6052; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 6053; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 6054; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 6055; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6056; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6057; GFX12-CU-NEXT: s_wait_storecnt 0x0 6058; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6059; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6060; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 6061; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6062; GFX12-CU-NEXT: s_endpgm 6063 ptr %out, i32 %in, i32 %old) { 6064entry: 6065 %gep = getelementptr i32, ptr %out, i32 4 6066 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst 6067 ret void 6068} 6069 6070define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( 6071; GFX7-LABEL: flat_system_release_seq_cst_cmpxchg: 6072; GFX7: ; %bb.0: ; %entry 6073; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6074; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6075; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6076; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6077; GFX7-NEXT: s_mov_b64 s[10:11], 16 6078; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6079; GFX7-NEXT: s_mov_b32 s4, s8 6080; GFX7-NEXT: s_mov_b32 s5, s9 6081; GFX7-NEXT: s_mov_b32 s9, s10 6082; GFX7-NEXT: s_mov_b32 s8, s11 6083; GFX7-NEXT: s_add_u32 s4, s4, s9 6084; GFX7-NEXT: s_addc_u32 s8, s5, s8 6085; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6086; GFX7-NEXT: s_mov_b32 s5, s8 6087; GFX7-NEXT: v_mov_b32_e32 v2, s7 6088; GFX7-NEXT: v_mov_b32_e32 v0, s6 6089; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6090; GFX7-NEXT: v_mov_b32_e32 v3, v0 6091; GFX7-NEXT: v_mov_b32_e32 v0, s4 6092; GFX7-NEXT: v_mov_b32_e32 v1, s5 6093; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6094; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6095; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6096; GFX7-NEXT: buffer_wbinvl1_vol 6097; GFX7-NEXT: s_endpgm 6098; 6099; GFX10-WGP-LABEL: flat_system_release_seq_cst_cmpxchg: 6100; GFX10-WGP: ; %bb.0: ; %entry 6101; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 6102; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6103; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 6104; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 6105; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 6106; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6107; GFX10-WGP-NEXT: s_mov_b32 s4, s8 6108; GFX10-WGP-NEXT: s_mov_b32 s5, s9 6109; GFX10-WGP-NEXT: s_mov_b32 s9, s10 6110; GFX10-WGP-NEXT: s_mov_b32 s8, s11 6111; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 6112; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 6113; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6114; GFX10-WGP-NEXT: s_mov_b32 s5, s8 6115; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 6116; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 6117; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6118; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 6119; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 6120; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 6121; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6122; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6123; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6124; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6125; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6126; GFX10-WGP-NEXT: buffer_gl1_inv 6127; GFX10-WGP-NEXT: buffer_gl0_inv 6128; GFX10-WGP-NEXT: s_endpgm 6129; 6130; GFX10-CU-LABEL: flat_system_release_seq_cst_cmpxchg: 6131; GFX10-CU: ; %bb.0: ; %entry 6132; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 6133; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6134; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 6135; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 6136; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 6137; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6138; GFX10-CU-NEXT: s_mov_b32 s4, s8 6139; GFX10-CU-NEXT: s_mov_b32 s5, s9 6140; GFX10-CU-NEXT: s_mov_b32 s9, s10 6141; GFX10-CU-NEXT: s_mov_b32 s8, s11 6142; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 6143; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 6144; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6145; GFX10-CU-NEXT: s_mov_b32 s5, s8 6146; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 6147; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 6148; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6149; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 6150; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 6151; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 6152; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6153; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6154; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6155; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6156; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6157; GFX10-CU-NEXT: buffer_gl1_inv 6158; GFX10-CU-NEXT: buffer_gl0_inv 6159; GFX10-CU-NEXT: s_endpgm 6160; 6161; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_cmpxchg: 6162; SKIP-CACHE-INV: ; %bb.0: ; %entry 6163; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 6164; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 6165; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 6166; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 6167; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 6168; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6169; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 6170; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 6171; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 6172; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 6173; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 6174; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 6175; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 6176; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 6177; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 6178; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 6179; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6180; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 6181; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6182; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6183; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6184; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6185; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6186; SKIP-CACHE-INV-NEXT: s_endpgm 6187; 6188; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: 6189; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6190; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6191; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6192; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6193; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6194; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6195; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6196; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6197; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6198; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6199; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6200; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6201; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6202; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6203; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6204; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6205; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6206; 6207; GFX90A-TGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: 6208; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6209; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6210; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6211; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6212; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6213; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6214; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6215; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6216; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6217; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6218; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6219; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6220; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6221; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6222; GFX90A-TGSPLIT-NEXT: buffer_invl2 6223; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6224; GFX90A-TGSPLIT-NEXT: s_endpgm 6225; 6226; GFX940-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: 6227; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6228; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6229; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6230; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6231; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6232; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6233; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6234; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6235; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6236; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6237; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6238; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6239; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6240; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6241; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6242; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6243; 6244; GFX940-TGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: 6245; GFX940-TGSPLIT: ; %bb.0: ; %entry 6246; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6247; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6248; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6249; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6250; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6251; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6252; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6253; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6254; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6255; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6256; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6257; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6258; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6259; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6260; GFX940-TGSPLIT-NEXT: s_endpgm 6261; 6262; GFX11-WGP-LABEL: flat_system_release_seq_cst_cmpxchg: 6263; GFX11-WGP: ; %bb.0: ; %entry 6264; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6265; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6266; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6267; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6268; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 6269; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 6270; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6271; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 6272; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 6273; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 6274; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6275; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6276; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6277; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6278; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6279; GFX11-WGP-NEXT: buffer_gl1_inv 6280; GFX11-WGP-NEXT: buffer_gl0_inv 6281; GFX11-WGP-NEXT: s_endpgm 6282; 6283; GFX11-CU-LABEL: flat_system_release_seq_cst_cmpxchg: 6284; GFX11-CU: ; %bb.0: ; %entry 6285; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6286; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6287; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6288; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6289; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 6290; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 6291; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6292; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 6293; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 6294; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 6295; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6296; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6297; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6298; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6299; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6300; GFX11-CU-NEXT: buffer_gl1_inv 6301; GFX11-CU-NEXT: buffer_gl0_inv 6302; GFX11-CU-NEXT: s_endpgm 6303; 6304; GFX12-WGP-LABEL: flat_system_release_seq_cst_cmpxchg: 6305; GFX12-WGP: ; %bb.0: ; %entry 6306; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6307; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6308; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6309; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6310; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 6311; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 6312; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6313; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 6314; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 6315; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 6316; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6317; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6318; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6319; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6320; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6321; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6322; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 6323; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6324; GFX12-WGP-NEXT: s_endpgm 6325; 6326; GFX12-CU-LABEL: flat_system_release_seq_cst_cmpxchg: 6327; GFX12-CU: ; %bb.0: ; %entry 6328; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6329; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6330; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6331; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6332; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 6333; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 6334; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6335; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 6336; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 6337; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 6338; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 6339; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6340; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6341; GFX12-CU-NEXT: s_wait_storecnt 0x0 6342; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6343; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6344; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 6345; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6346; GFX12-CU-NEXT: s_endpgm 6347 ptr %out, i32 %in, i32 %old) { 6348entry: 6349 %gep = getelementptr i32, ptr %out, i32 4 6350 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst 6351 ret void 6352} 6353 6354define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( 6355; GFX7-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6356; GFX7: ; %bb.0: ; %entry 6357; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6358; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6359; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6360; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6361; GFX7-NEXT: s_mov_b64 s[10:11], 16 6362; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6363; GFX7-NEXT: s_mov_b32 s4, s8 6364; GFX7-NEXT: s_mov_b32 s5, s9 6365; GFX7-NEXT: s_mov_b32 s9, s10 6366; GFX7-NEXT: s_mov_b32 s8, s11 6367; GFX7-NEXT: s_add_u32 s4, s4, s9 6368; GFX7-NEXT: s_addc_u32 s8, s5, s8 6369; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6370; GFX7-NEXT: s_mov_b32 s5, s8 6371; GFX7-NEXT: v_mov_b32_e32 v2, s7 6372; GFX7-NEXT: v_mov_b32_e32 v0, s6 6373; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6374; GFX7-NEXT: v_mov_b32_e32 v3, v0 6375; GFX7-NEXT: v_mov_b32_e32 v0, s4 6376; GFX7-NEXT: v_mov_b32_e32 v1, s5 6377; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6378; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6379; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6380; GFX7-NEXT: buffer_wbinvl1_vol 6381; GFX7-NEXT: s_endpgm 6382; 6383; GFX10-WGP-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6384; GFX10-WGP: ; %bb.0: ; %entry 6385; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 6386; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6387; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 6388; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 6389; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 6390; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6391; GFX10-WGP-NEXT: s_mov_b32 s4, s8 6392; GFX10-WGP-NEXT: s_mov_b32 s5, s9 6393; GFX10-WGP-NEXT: s_mov_b32 s9, s10 6394; GFX10-WGP-NEXT: s_mov_b32 s8, s11 6395; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 6396; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 6397; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6398; GFX10-WGP-NEXT: s_mov_b32 s5, s8 6399; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 6400; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 6401; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6402; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 6403; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 6404; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 6405; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6406; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6407; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6408; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6409; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6410; GFX10-WGP-NEXT: buffer_gl1_inv 6411; GFX10-WGP-NEXT: buffer_gl0_inv 6412; GFX10-WGP-NEXT: s_endpgm 6413; 6414; GFX10-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6415; GFX10-CU: ; %bb.0: ; %entry 6416; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 6417; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6418; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 6419; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 6420; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 6421; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6422; GFX10-CU-NEXT: s_mov_b32 s4, s8 6423; GFX10-CU-NEXT: s_mov_b32 s5, s9 6424; GFX10-CU-NEXT: s_mov_b32 s9, s10 6425; GFX10-CU-NEXT: s_mov_b32 s8, s11 6426; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 6427; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 6428; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6429; GFX10-CU-NEXT: s_mov_b32 s5, s8 6430; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 6431; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 6432; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6433; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 6434; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 6435; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 6436; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6437; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6438; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6439; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6440; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6441; GFX10-CU-NEXT: buffer_gl1_inv 6442; GFX10-CU-NEXT: buffer_gl0_inv 6443; GFX10-CU-NEXT: s_endpgm 6444; 6445; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6446; SKIP-CACHE-INV: ; %bb.0: ; %entry 6447; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 6448; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 6449; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 6450; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 6451; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 6452; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6453; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 6454; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 6455; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 6456; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 6457; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 6458; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 6459; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 6460; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 6461; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 6462; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 6463; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6464; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 6465; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6466; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6467; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6468; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6469; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6470; SKIP-CACHE-INV-NEXT: s_endpgm 6471; 6472; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6473; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6474; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6475; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6476; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6477; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6478; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6479; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6480; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6481; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6482; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6483; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6484; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6485; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6486; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6487; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6488; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6489; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6490; 6491; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6492; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6493; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6494; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6495; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6496; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6497; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6498; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6499; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6500; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6501; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6502; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6503; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6504; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6505; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6506; GFX90A-TGSPLIT-NEXT: buffer_invl2 6507; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6508; GFX90A-TGSPLIT-NEXT: s_endpgm 6509; 6510; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6511; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6512; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6513; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6514; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6515; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6516; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6517; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6518; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6519; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6520; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6521; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6522; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6523; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6524; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6525; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6526; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6527; 6528; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6529; GFX940-TGSPLIT: ; %bb.0: ; %entry 6530; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6531; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6532; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6533; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6534; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6535; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6536; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6537; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6538; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6539; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6540; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6541; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6542; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6543; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6544; GFX940-TGSPLIT-NEXT: s_endpgm 6545; 6546; GFX11-WGP-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6547; GFX11-WGP: ; %bb.0: ; %entry 6548; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6549; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6550; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6551; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6552; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 6553; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 6554; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6555; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 6556; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 6557; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 6558; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6559; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6560; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6561; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6562; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6563; GFX11-WGP-NEXT: buffer_gl1_inv 6564; GFX11-WGP-NEXT: buffer_gl0_inv 6565; GFX11-WGP-NEXT: s_endpgm 6566; 6567; GFX11-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6568; GFX11-CU: ; %bb.0: ; %entry 6569; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6570; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6571; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6572; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6573; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 6574; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 6575; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6576; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 6577; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 6578; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 6579; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6580; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6581; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6582; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6583; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6584; GFX11-CU-NEXT: buffer_gl1_inv 6585; GFX11-CU-NEXT: buffer_gl0_inv 6586; GFX11-CU-NEXT: s_endpgm 6587; 6588; GFX12-WGP-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6589; GFX12-WGP: ; %bb.0: ; %entry 6590; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6591; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6592; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6593; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6594; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 6595; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 6596; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6597; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 6598; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 6599; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 6600; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6601; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6602; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6603; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6604; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6605; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6606; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 6607; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6608; GFX12-WGP-NEXT: s_endpgm 6609; 6610; GFX12-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: 6611; GFX12-CU: ; %bb.0: ; %entry 6612; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6613; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6614; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6615; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6616; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 6617; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 6618; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6619; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 6620; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 6621; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 6622; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 6623; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6624; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6625; GFX12-CU-NEXT: s_wait_storecnt 0x0 6626; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6627; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6628; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 6629; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6630; GFX12-CU-NEXT: s_endpgm 6631 ptr %out, i32 %in, i32 %old) { 6632entry: 6633 %gep = getelementptr i32, ptr %out, i32 4 6634 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst 6635 ret void 6636} 6637 6638define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( 6639; GFX7-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6640; GFX7: ; %bb.0: ; %entry 6641; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 6642; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6643; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 6644; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 6645; GFX7-NEXT: s_mov_b64 s[10:11], 16 6646; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6647; GFX7-NEXT: s_mov_b32 s4, s8 6648; GFX7-NEXT: s_mov_b32 s5, s9 6649; GFX7-NEXT: s_mov_b32 s9, s10 6650; GFX7-NEXT: s_mov_b32 s8, s11 6651; GFX7-NEXT: s_add_u32 s4, s4, s9 6652; GFX7-NEXT: s_addc_u32 s8, s5, s8 6653; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6654; GFX7-NEXT: s_mov_b32 s5, s8 6655; GFX7-NEXT: v_mov_b32_e32 v2, s7 6656; GFX7-NEXT: v_mov_b32_e32 v0, s6 6657; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6658; GFX7-NEXT: v_mov_b32_e32 v3, v0 6659; GFX7-NEXT: v_mov_b32_e32 v0, s4 6660; GFX7-NEXT: v_mov_b32_e32 v1, s5 6661; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6662; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6663; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6664; GFX7-NEXT: buffer_wbinvl1_vol 6665; GFX7-NEXT: s_endpgm 6666; 6667; GFX10-WGP-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6668; GFX10-WGP: ; %bb.0: ; %entry 6669; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 6670; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6671; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 6672; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 6673; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 6674; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6675; GFX10-WGP-NEXT: s_mov_b32 s4, s8 6676; GFX10-WGP-NEXT: s_mov_b32 s5, s9 6677; GFX10-WGP-NEXT: s_mov_b32 s9, s10 6678; GFX10-WGP-NEXT: s_mov_b32 s8, s11 6679; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 6680; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 6681; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6682; GFX10-WGP-NEXT: s_mov_b32 s5, s8 6683; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 6684; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 6685; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6686; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 6687; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 6688; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 6689; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6690; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6691; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6692; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6693; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6694; GFX10-WGP-NEXT: buffer_gl1_inv 6695; GFX10-WGP-NEXT: buffer_gl0_inv 6696; GFX10-WGP-NEXT: s_endpgm 6697; 6698; GFX10-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6699; GFX10-CU: ; %bb.0: ; %entry 6700; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 6701; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 6702; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 6703; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 6704; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 6705; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6706; GFX10-CU-NEXT: s_mov_b32 s4, s8 6707; GFX10-CU-NEXT: s_mov_b32 s5, s9 6708; GFX10-CU-NEXT: s_mov_b32 s9, s10 6709; GFX10-CU-NEXT: s_mov_b32 s8, s11 6710; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 6711; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 6712; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 6713; GFX10-CU-NEXT: s_mov_b32 s5, s8 6714; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 6715; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 6716; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6717; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 6718; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 6719; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 6720; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6721; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6722; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6723; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6724; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 6725; GFX10-CU-NEXT: buffer_gl1_inv 6726; GFX10-CU-NEXT: buffer_gl0_inv 6727; GFX10-CU-NEXT: s_endpgm 6728; 6729; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6730; SKIP-CACHE-INV: ; %bb.0: ; %entry 6731; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 6732; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 6733; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 6734; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 6735; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 6736; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 6737; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 6738; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 6739; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 6740; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 6741; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 6742; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 6743; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 6744; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 6745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 6746; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 6747; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6748; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 6749; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 6750; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 6751; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6752; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 6753; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6754; SKIP-CACHE-INV-NEXT: s_endpgm 6755; 6756; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6757; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 6758; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6759; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6760; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6761; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6762; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6763; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6764; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6765; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6766; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6767; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 6768; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6769; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6770; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6771; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 6772; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 6773; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 6774; 6775; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6776; GFX90A-TGSPLIT: ; %bb.0: ; %entry 6777; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 6778; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 6779; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 6780; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6781; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 6782; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 6783; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6784; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6785; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 6786; GFX90A-TGSPLIT-NEXT: buffer_wbl2 6787; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6788; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 6789; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6790; GFX90A-TGSPLIT-NEXT: buffer_invl2 6791; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 6792; GFX90A-TGSPLIT-NEXT: s_endpgm 6793; 6794; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6795; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 6796; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6797; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6798; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6799; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6800; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6801; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6802; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6803; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6804; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6805; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6806; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6807; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6808; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6809; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 6810; GFX940-NOTTGSPLIT-NEXT: s_endpgm 6811; 6812; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6813; GFX940-TGSPLIT: ; %bb.0: ; %entry 6814; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 6815; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 6816; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 6817; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 6818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 6819; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 6820; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6821; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 6822; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 6823; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 6824; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6825; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 6826; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 6827; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 6828; GFX940-TGSPLIT-NEXT: s_endpgm 6829; 6830; GFX11-WGP-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6831; GFX11-WGP: ; %bb.0: ; %entry 6832; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6833; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6834; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6835; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6836; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 6837; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 6838; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6839; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 6840; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 6841; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 6842; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6843; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6844; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6845; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 6846; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 6847; GFX11-WGP-NEXT: buffer_gl1_inv 6848; GFX11-WGP-NEXT: buffer_gl0_inv 6849; GFX11-WGP-NEXT: s_endpgm 6850; 6851; GFX11-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6852; GFX11-CU: ; %bb.0: ; %entry 6853; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6854; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6855; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6856; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6857; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 6858; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 6859; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6860; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 6861; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 6862; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 6863; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6864; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6865; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 6866; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 6867; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 6868; GFX11-CU-NEXT: buffer_gl1_inv 6869; GFX11-CU-NEXT: buffer_gl0_inv 6870; GFX11-CU-NEXT: s_endpgm 6871; 6872; GFX12-WGP-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6873; GFX12-WGP: ; %bb.0: ; %entry 6874; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6875; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 6876; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 6877; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 6878; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 6879; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 6880; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6881; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 6882; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 6883; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 6884; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 6885; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 6886; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 6887; GFX12-WGP-NEXT: s_wait_storecnt 0x0 6888; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 6889; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6890; GFX12-WGP-NEXT: s_wait_storecnt_dscnt 0x0 6891; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 6892; GFX12-WGP-NEXT: s_endpgm 6893; 6894; GFX12-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: 6895; GFX12-CU: ; %bb.0: ; %entry 6896; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 6897; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 6898; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 6899; GFX12-CU-NEXT: s_wait_kmcnt 0x0 6900; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 6901; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 6902; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6903; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 6904; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 6905; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 6906; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 6907; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 6908; GFX12-CU-NEXT: s_wait_samplecnt 0x0 6909; GFX12-CU-NEXT: s_wait_storecnt 0x0 6910; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 6911; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 6912; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0 6913; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 6914; GFX12-CU-NEXT: s_endpgm 6915 ptr %out, i32 %in, i32 %old) { 6916entry: 6917 %gep = getelementptr i32, ptr %out, i32 4 6918 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst 6919 ret void 6920} 6921 6922define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( 6923; GFX7-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 6924; GFX7: ; %bb.0: ; %entry 6925; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 6926; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6927; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 6928; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 6929; GFX7-NEXT: s_mov_b64 s[12:13], 16 6930; GFX7-NEXT: s_waitcnt lgkmcnt(0) 6931; GFX7-NEXT: s_mov_b32 s6, s4 6932; GFX7-NEXT: s_mov_b32 s7, s5 6933; GFX7-NEXT: s_mov_b32 s11, s12 6934; GFX7-NEXT: s_mov_b32 s10, s13 6935; GFX7-NEXT: s_add_u32 s6, s6, s11 6936; GFX7-NEXT: s_addc_u32 s10, s7, s10 6937; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6938; GFX7-NEXT: s_mov_b32 s7, s10 6939; GFX7-NEXT: v_mov_b32_e32 v2, s9 6940; GFX7-NEXT: v_mov_b32_e32 v0, s8 6941; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6942; GFX7-NEXT: v_mov_b32_e32 v3, v0 6943; GFX7-NEXT: v_mov_b32_e32 v0, s6 6944; GFX7-NEXT: v_mov_b32_e32 v1, s7 6945; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6946; GFX7-NEXT: v_mov_b32_e32 v0, s4 6947; GFX7-NEXT: v_mov_b32_e32 v1, s5 6948; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6949; GFX7-NEXT: flat_store_dword v[0:1], v2 6950; GFX7-NEXT: s_endpgm 6951; 6952; GFX10-WGP-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 6953; GFX10-WGP: ; %bb.0: ; %entry 6954; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 6955; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6956; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 6957; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 6958; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 6959; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 6960; GFX10-WGP-NEXT: s_mov_b32 s6, s4 6961; GFX10-WGP-NEXT: s_mov_b32 s7, s5 6962; GFX10-WGP-NEXT: s_mov_b32 s11, s12 6963; GFX10-WGP-NEXT: s_mov_b32 s10, s13 6964; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 6965; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 6966; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6967; GFX10-WGP-NEXT: s_mov_b32 s7, s10 6968; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 6969; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 6970; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 6971; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 6972; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 6973; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 6974; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 6975; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 6976; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 6977; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 6978; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 6979; GFX10-WGP-NEXT: s_endpgm 6980; 6981; GFX10-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 6982; GFX10-CU: ; %bb.0: ; %entry 6983; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 6984; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 6985; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 6986; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 6987; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 6988; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 6989; GFX10-CU-NEXT: s_mov_b32 s6, s4 6990; GFX10-CU-NEXT: s_mov_b32 s7, s5 6991; GFX10-CU-NEXT: s_mov_b32 s11, s12 6992; GFX10-CU-NEXT: s_mov_b32 s10, s13 6993; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 6994; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 6995; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 6996; GFX10-CU-NEXT: s_mov_b32 s7, s10 6997; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 6998; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 6999; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7000; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 7001; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 7002; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7003; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7004; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 7005; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 7006; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7007; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 7008; GFX10-CU-NEXT: s_endpgm 7009; 7010; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7011; SKIP-CACHE-INV: ; %bb.0: ; %entry 7012; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7013; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7014; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7015; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7016; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 7017; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7018; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 7019; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 7020; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 7021; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 7022; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 7023; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 7024; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 7025; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7026; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 7027; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 7028; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7029; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 7030; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 7031; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 7032; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7033; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7034; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7035; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7036; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 7037; SKIP-CACHE-INV-NEXT: s_endpgm 7038; 7039; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7040; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7041; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7042; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7043; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7044; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7045; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7046; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7047; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7048; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7049; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7050; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7051; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7052; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7053; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 7054; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7055; 7056; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7057; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7058; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7059; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7060; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7061; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7062; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7063; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7064; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7065; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7066; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7067; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7068; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7069; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7070; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 7071; GFX90A-TGSPLIT-NEXT: s_endpgm 7072; 7073; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7074; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7075; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7076; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7077; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7078; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7079; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7080; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7081; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7082; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7083; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7084; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7085; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7086; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7087; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7088; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7089; 7090; GFX940-TGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7091; GFX940-TGSPLIT: ; %bb.0: ; %entry 7092; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7093; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7094; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7095; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7096; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7097; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7098; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7099; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7100; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7101; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7102; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7103; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7104; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7105; GFX940-TGSPLIT-NEXT: s_endpgm 7106; 7107; GFX11-WGP-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7108; GFX11-WGP: ; %bb.0: ; %entry 7109; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7110; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7111; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7112; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7113; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 7114; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 7115; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7116; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 7117; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7118; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7119; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7120; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7121; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7122; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7123; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 7124; GFX11-WGP-NEXT: s_endpgm 7125; 7126; GFX11-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7127; GFX11-CU: ; %bb.0: ; %entry 7128; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7129; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7130; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7131; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7132; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 7133; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 7134; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7135; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 7136; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7137; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7138; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7139; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7140; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7141; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7142; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 7143; GFX11-CU-NEXT: s_endpgm 7144; 7145; GFX12-WGP-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7146; GFX12-WGP: ; %bb.0: ; %entry 7147; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7148; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7149; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7150; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7151; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 7152; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 7153; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7154; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 7155; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7156; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7157; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7158; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7159; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7160; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7161; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 7162; GFX12-WGP-NEXT: s_endpgm 7163; 7164; GFX12-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: 7165; GFX12-CU: ; %bb.0: ; %entry 7166; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7167; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7168; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7169; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7170; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 7171; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 7172; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7173; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 7174; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7175; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7176; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7177; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7178; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7179; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7180; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 7181; GFX12-CU-NEXT: s_endpgm 7182 ptr %out, i32 %in, i32 %old) { 7183entry: 7184 %gep = getelementptr i32, ptr %out, i32 4 7185 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic 7186 %val0 = extractvalue { i32, i1 } %val, 0 7187 store i32 %val0, ptr %out, align 4 7188 ret void 7189} 7190 7191define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( 7192; GFX7-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7193; GFX7: ; %bb.0: ; %entry 7194; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7195; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7196; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7197; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7198; GFX7-NEXT: s_mov_b64 s[12:13], 16 7199; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7200; GFX7-NEXT: s_mov_b32 s6, s4 7201; GFX7-NEXT: s_mov_b32 s7, s5 7202; GFX7-NEXT: s_mov_b32 s11, s12 7203; GFX7-NEXT: s_mov_b32 s10, s13 7204; GFX7-NEXT: s_add_u32 s6, s6, s11 7205; GFX7-NEXT: s_addc_u32 s10, s7, s10 7206; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7207; GFX7-NEXT: s_mov_b32 s7, s10 7208; GFX7-NEXT: v_mov_b32_e32 v2, s9 7209; GFX7-NEXT: v_mov_b32_e32 v0, s8 7210; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7211; GFX7-NEXT: v_mov_b32_e32 v3, v0 7212; GFX7-NEXT: v_mov_b32_e32 v0, s6 7213; GFX7-NEXT: v_mov_b32_e32 v1, s7 7214; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7215; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7216; GFX7-NEXT: buffer_wbinvl1_vol 7217; GFX7-NEXT: v_mov_b32_e32 v0, s4 7218; GFX7-NEXT: v_mov_b32_e32 v1, s5 7219; GFX7-NEXT: flat_store_dword v[0:1], v2 7220; GFX7-NEXT: s_endpgm 7221; 7222; GFX10-WGP-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7223; GFX10-WGP: ; %bb.0: ; %entry 7224; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 7225; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7226; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 7227; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 7228; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 7229; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7230; GFX10-WGP-NEXT: s_mov_b32 s6, s4 7231; GFX10-WGP-NEXT: s_mov_b32 s7, s5 7232; GFX10-WGP-NEXT: s_mov_b32 s11, s12 7233; GFX10-WGP-NEXT: s_mov_b32 s10, s13 7234; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 7235; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 7236; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7237; GFX10-WGP-NEXT: s_mov_b32 s7, s10 7238; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 7239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 7240; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7241; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 7242; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 7243; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7244; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7245; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7246; GFX10-WGP-NEXT: buffer_gl1_inv 7247; GFX10-WGP-NEXT: buffer_gl0_inv 7248; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 7249; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 7250; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 7251; GFX10-WGP-NEXT: s_endpgm 7252; 7253; GFX10-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7254; GFX10-CU: ; %bb.0: ; %entry 7255; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 7256; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7257; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 7258; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 7259; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 7260; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7261; GFX10-CU-NEXT: s_mov_b32 s6, s4 7262; GFX10-CU-NEXT: s_mov_b32 s7, s5 7263; GFX10-CU-NEXT: s_mov_b32 s11, s12 7264; GFX10-CU-NEXT: s_mov_b32 s10, s13 7265; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 7266; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 7267; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7268; GFX10-CU-NEXT: s_mov_b32 s7, s10 7269; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 7270; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 7271; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7272; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 7273; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 7274; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7275; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7276; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7277; GFX10-CU-NEXT: buffer_gl1_inv 7278; GFX10-CU-NEXT: buffer_gl0_inv 7279; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 7280; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 7281; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 7282; GFX10-CU-NEXT: s_endpgm 7283; 7284; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7285; SKIP-CACHE-INV: ; %bb.0: ; %entry 7286; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7287; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7288; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7289; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7290; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 7291; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7292; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 7293; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 7294; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 7295; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 7296; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 7297; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 7298; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 7299; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7300; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 7301; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 7302; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7303; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 7304; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 7305; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 7306; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7307; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7308; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7309; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7310; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 7311; SKIP-CACHE-INV-NEXT: s_endpgm 7312; 7313; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7314; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7315; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7316; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7317; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7318; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7319; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7320; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7321; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7322; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7323; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7324; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7325; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7326; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7327; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7328; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7329; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 7330; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7331; 7332; GFX90A-TGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7333; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7334; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7335; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7336; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7337; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7338; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7339; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7340; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7341; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7342; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7343; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7344; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7345; GFX90A-TGSPLIT-NEXT: buffer_invl2 7346; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7347; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7348; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 7349; GFX90A-TGSPLIT-NEXT: s_endpgm 7350; 7351; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7352; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7353; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7354; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7355; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7356; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7357; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7358; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7359; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7360; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7361; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7362; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7363; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7364; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 7365; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7366; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7367; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7368; 7369; GFX940-TGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7370; GFX940-TGSPLIT: ; %bb.0: ; %entry 7371; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7372; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7373; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7374; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7375; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7376; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7377; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7378; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7379; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7380; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7381; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7382; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 7383; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7384; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7385; GFX940-TGSPLIT-NEXT: s_endpgm 7386; 7387; GFX11-WGP-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7388; GFX11-WGP: ; %bb.0: ; %entry 7389; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7390; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7391; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7392; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7393; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 7394; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 7395; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7396; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 7397; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7398; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7399; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7400; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7401; GFX11-WGP-NEXT: buffer_gl1_inv 7402; GFX11-WGP-NEXT: buffer_gl0_inv 7403; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7404; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7405; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 7406; GFX11-WGP-NEXT: s_endpgm 7407; 7408; GFX11-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7409; GFX11-CU: ; %bb.0: ; %entry 7410; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7411; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7412; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7413; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7414; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 7415; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 7416; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7417; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 7418; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7419; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7420; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7421; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7422; GFX11-CU-NEXT: buffer_gl1_inv 7423; GFX11-CU-NEXT: buffer_gl0_inv 7424; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7425; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7426; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 7427; GFX11-CU-NEXT: s_endpgm 7428; 7429; GFX12-WGP-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7430; GFX12-WGP: ; %bb.0: ; %entry 7431; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7432; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7433; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7434; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7435; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 7436; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 7437; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7438; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 7439; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7440; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7441; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7442; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7443; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 7444; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7445; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7446; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 7447; GFX12-WGP-NEXT: s_endpgm 7448; 7449; GFX12-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: 7450; GFX12-CU: ; %bb.0: ; %entry 7451; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7452; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7453; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7454; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7455; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 7456; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 7457; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7458; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 7459; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7460; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7461; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7462; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7463; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 7464; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7465; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7466; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 7467; GFX12-CU-NEXT: s_endpgm 7468 ptr %out, i32 %in, i32 %old) { 7469entry: 7470 %gep = getelementptr i32, ptr %out, i32 4 7471 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic 7472 %val0 = extractvalue { i32, i1 } %val, 0 7473 store i32 %val0, ptr %out, align 4 7474 ret void 7475} 7476 7477define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( 7478; GFX7-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7479; GFX7: ; %bb.0: ; %entry 7480; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7481; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7482; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7483; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7484; GFX7-NEXT: s_mov_b64 s[12:13], 16 7485; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7486; GFX7-NEXT: s_mov_b32 s6, s4 7487; GFX7-NEXT: s_mov_b32 s7, s5 7488; GFX7-NEXT: s_mov_b32 s11, s12 7489; GFX7-NEXT: s_mov_b32 s10, s13 7490; GFX7-NEXT: s_add_u32 s6, s6, s11 7491; GFX7-NEXT: s_addc_u32 s10, s7, s10 7492; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7493; GFX7-NEXT: s_mov_b32 s7, s10 7494; GFX7-NEXT: v_mov_b32_e32 v2, s9 7495; GFX7-NEXT: v_mov_b32_e32 v0, s8 7496; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7497; GFX7-NEXT: v_mov_b32_e32 v3, v0 7498; GFX7-NEXT: v_mov_b32_e32 v0, s6 7499; GFX7-NEXT: v_mov_b32_e32 v1, s7 7500; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7501; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7502; GFX7-NEXT: v_mov_b32_e32 v0, s4 7503; GFX7-NEXT: v_mov_b32_e32 v1, s5 7504; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7505; GFX7-NEXT: flat_store_dword v[0:1], v2 7506; GFX7-NEXT: s_endpgm 7507; 7508; GFX10-WGP-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7509; GFX10-WGP: ; %bb.0: ; %entry 7510; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 7511; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7512; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 7513; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 7514; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 7515; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7516; GFX10-WGP-NEXT: s_mov_b32 s6, s4 7517; GFX10-WGP-NEXT: s_mov_b32 s7, s5 7518; GFX10-WGP-NEXT: s_mov_b32 s11, s12 7519; GFX10-WGP-NEXT: s_mov_b32 s10, s13 7520; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 7521; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 7522; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7523; GFX10-WGP-NEXT: s_mov_b32 s7, s10 7524; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 7525; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 7526; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7527; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 7528; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 7529; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7530; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7531; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7532; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7533; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 7534; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 7535; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7536; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 7537; GFX10-WGP-NEXT: s_endpgm 7538; 7539; GFX10-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7540; GFX10-CU: ; %bb.0: ; %entry 7541; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 7542; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7543; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 7544; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 7545; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 7546; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7547; GFX10-CU-NEXT: s_mov_b32 s6, s4 7548; GFX10-CU-NEXT: s_mov_b32 s7, s5 7549; GFX10-CU-NEXT: s_mov_b32 s11, s12 7550; GFX10-CU-NEXT: s_mov_b32 s10, s13 7551; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 7552; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 7553; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7554; GFX10-CU-NEXT: s_mov_b32 s7, s10 7555; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 7556; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 7557; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7558; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 7559; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 7560; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7561; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7562; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7563; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7564; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 7565; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 7566; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7567; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 7568; GFX10-CU-NEXT: s_endpgm 7569; 7570; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7571; SKIP-CACHE-INV: ; %bb.0: ; %entry 7572; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7573; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7574; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7575; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7576; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 7577; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7578; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 7579; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 7580; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 7581; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 7582; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 7583; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 7584; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 7585; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7586; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 7587; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 7588; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7589; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 7590; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 7591; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 7592; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7593; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7594; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7595; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7596; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7597; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 7598; SKIP-CACHE-INV-NEXT: s_endpgm 7599; 7600; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7601; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7602; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7603; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7604; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7605; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7606; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7607; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7608; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7609; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7610; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7611; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7612; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7613; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7614; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7615; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7616; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 7617; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7618; 7619; GFX90A-TGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7620; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7621; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7622; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7623; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7624; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7625; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7626; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7627; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7628; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7629; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7630; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7631; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7632; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7633; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7634; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7635; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 7636; GFX90A-TGSPLIT-NEXT: s_endpgm 7637; 7638; GFX940-NOTTGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7639; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7640; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7641; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7642; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7643; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7644; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7645; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7646; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7647; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7648; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7649; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7650; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7651; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7652; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7653; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7654; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7655; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7656; 7657; GFX940-TGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7658; GFX940-TGSPLIT: ; %bb.0: ; %entry 7659; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7660; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7661; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7662; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7663; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7664; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7665; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7666; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7667; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7668; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7669; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7670; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7671; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7672; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7673; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7674; GFX940-TGSPLIT-NEXT: s_endpgm 7675; 7676; GFX11-WGP-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7677; GFX11-WGP: ; %bb.0: ; %entry 7678; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7679; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7680; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7681; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7682; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 7683; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 7684; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7685; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 7686; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7687; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7688; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7689; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7690; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7691; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7692; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7693; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7694; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 7695; GFX11-WGP-NEXT: s_endpgm 7696; 7697; GFX11-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7698; GFX11-CU: ; %bb.0: ; %entry 7699; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7700; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7701; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7702; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 7703; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 7704; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 7705; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7706; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 7707; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7708; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7709; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7710; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 7711; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7712; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 7713; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 7714; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7715; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 7716; GFX11-CU-NEXT: s_endpgm 7717; 7718; GFX12-WGP-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7719; GFX12-WGP: ; %bb.0: ; %entry 7720; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7721; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7722; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7723; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 7724; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 7725; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 7726; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7727; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 7728; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7729; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7730; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 7731; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 7732; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 7733; GFX12-WGP-NEXT: s_wait_storecnt 0x0 7734; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7735; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7736; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 7737; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 7738; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 7739; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 7740; GFX12-WGP-NEXT: s_endpgm 7741; 7742; GFX12-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg: 7743; GFX12-CU: ; %bb.0: ; %entry 7744; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7745; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 7746; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 7747; GFX12-CU-NEXT: s_wait_kmcnt 0x0 7748; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 7749; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 7750; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7751; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 7752; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7753; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7754; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 7755; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 7756; GFX12-CU-NEXT: s_wait_samplecnt 0x0 7757; GFX12-CU-NEXT: s_wait_storecnt 0x0 7758; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7759; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 7760; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 7761; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 7762; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 7763; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 7764; GFX12-CU-NEXT: s_endpgm 7765 ptr %out, i32 %in, i32 %old) { 7766entry: 7767 %gep = getelementptr i32, ptr %out, i32 4 7768 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic 7769 %val0 = extractvalue { i32, i1 } %val, 0 7770 store i32 %val0, ptr %out, align 4 7771 ret void 7772} 7773 7774define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( 7775; GFX7-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7776; GFX7: ; %bb.0: ; %entry 7777; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 7778; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7779; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 7780; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 7781; GFX7-NEXT: s_mov_b64 s[12:13], 16 7782; GFX7-NEXT: s_waitcnt lgkmcnt(0) 7783; GFX7-NEXT: s_mov_b32 s6, s4 7784; GFX7-NEXT: s_mov_b32 s7, s5 7785; GFX7-NEXT: s_mov_b32 s11, s12 7786; GFX7-NEXT: s_mov_b32 s10, s13 7787; GFX7-NEXT: s_add_u32 s6, s6, s11 7788; GFX7-NEXT: s_addc_u32 s10, s7, s10 7789; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7790; GFX7-NEXT: s_mov_b32 s7, s10 7791; GFX7-NEXT: v_mov_b32_e32 v2, s9 7792; GFX7-NEXT: v_mov_b32_e32 v0, s8 7793; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7794; GFX7-NEXT: v_mov_b32_e32 v3, v0 7795; GFX7-NEXT: v_mov_b32_e32 v0, s6 7796; GFX7-NEXT: v_mov_b32_e32 v1, s7 7797; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7798; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7799; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7800; GFX7-NEXT: buffer_wbinvl1_vol 7801; GFX7-NEXT: v_mov_b32_e32 v0, s4 7802; GFX7-NEXT: v_mov_b32_e32 v1, s5 7803; GFX7-NEXT: flat_store_dword v[0:1], v2 7804; GFX7-NEXT: s_endpgm 7805; 7806; GFX10-WGP-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7807; GFX10-WGP: ; %bb.0: ; %entry 7808; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 7809; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7810; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 7811; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 7812; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 7813; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 7814; GFX10-WGP-NEXT: s_mov_b32 s6, s4 7815; GFX10-WGP-NEXT: s_mov_b32 s7, s5 7816; GFX10-WGP-NEXT: s_mov_b32 s11, s12 7817; GFX10-WGP-NEXT: s_mov_b32 s10, s13 7818; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 7819; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 7820; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7821; GFX10-WGP-NEXT: s_mov_b32 s7, s10 7822; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 7823; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 7824; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7825; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 7826; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 7827; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 7828; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7829; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7830; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7831; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7832; GFX10-WGP-NEXT: buffer_gl1_inv 7833; GFX10-WGP-NEXT: buffer_gl0_inv 7834; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 7835; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 7836; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 7837; GFX10-WGP-NEXT: s_endpgm 7838; 7839; GFX10-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7840; GFX10-CU: ; %bb.0: ; %entry 7841; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 7842; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 7843; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 7844; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 7845; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 7846; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 7847; GFX10-CU-NEXT: s_mov_b32 s6, s4 7848; GFX10-CU-NEXT: s_mov_b32 s7, s5 7849; GFX10-CU-NEXT: s_mov_b32 s11, s12 7850; GFX10-CU-NEXT: s_mov_b32 s10, s13 7851; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 7852; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 7853; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 7854; GFX10-CU-NEXT: s_mov_b32 s7, s10 7855; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 7856; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 7857; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7858; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 7859; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 7860; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 7861; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7862; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 7863; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7864; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7865; GFX10-CU-NEXT: buffer_gl1_inv 7866; GFX10-CU-NEXT: buffer_gl0_inv 7867; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 7868; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 7869; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 7870; GFX10-CU-NEXT: s_endpgm 7871; 7872; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7873; SKIP-CACHE-INV: ; %bb.0: ; %entry 7874; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 7875; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 7876; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 7877; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 7878; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 7879; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 7880; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 7881; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 7882; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 7883; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 7884; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 7885; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 7886; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 7887; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 7888; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 7889; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 7890; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7891; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 7892; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 7893; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 7894; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7895; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 7896; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7897; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 7898; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 7899; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 7900; SKIP-CACHE-INV-NEXT: s_endpgm 7901; 7902; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7903; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 7904; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7905; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7906; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7907; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7908; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7909; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7910; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7911; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7912; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7913; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 7914; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7915; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7916; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7917; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 7918; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 7919; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7920; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 7921; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 7922; 7923; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7924; GFX90A-TGSPLIT: ; %bb.0: ; %entry 7925; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 7926; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 7927; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 7928; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7929; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 7930; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 7931; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7932; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7933; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7934; GFX90A-TGSPLIT-NEXT: buffer_wbl2 7935; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7936; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 7937; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7938; GFX90A-TGSPLIT-NEXT: buffer_invl2 7939; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 7940; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 7941; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 7942; GFX90A-TGSPLIT-NEXT: s_endpgm 7943; 7944; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7945; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 7946; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7947; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7948; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7949; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7950; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7951; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7952; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7953; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7954; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7955; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7956; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7957; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7958; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7959; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 7960; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7961; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7962; GFX940-NOTTGSPLIT-NEXT: s_endpgm 7963; 7964; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7965; GFX940-TGSPLIT: ; %bb.0: ; %entry 7966; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 7967; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 7968; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 7969; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 7970; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 7971; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 7972; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7973; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 7974; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7975; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 7976; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7977; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 7978; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 7979; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 7980; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 7981; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 7982; GFX940-TGSPLIT-NEXT: s_endpgm 7983; 7984; GFX11-WGP-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 7985; GFX11-WGP: ; %bb.0: ; %entry 7986; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 7987; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 7988; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 7989; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 7990; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 7991; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 7992; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 7993; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 7994; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 7995; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 7996; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 7997; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 7998; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 7999; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8000; GFX11-WGP-NEXT: buffer_gl1_inv 8001; GFX11-WGP-NEXT: buffer_gl0_inv 8002; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8003; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8004; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 8005; GFX11-WGP-NEXT: s_endpgm 8006; 8007; GFX11-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 8008; GFX11-CU: ; %bb.0: ; %entry 8009; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8010; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8011; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8012; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8013; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 8014; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 8015; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8016; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 8017; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8018; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8019; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8020; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8021; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8022; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8023; GFX11-CU-NEXT: buffer_gl1_inv 8024; GFX11-CU-NEXT: buffer_gl0_inv 8025; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8026; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8027; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 8028; GFX11-CU-NEXT: s_endpgm 8029; 8030; GFX12-WGP-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 8031; GFX12-WGP: ; %bb.0: ; %entry 8032; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8033; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8034; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8035; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8036; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 8037; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 8038; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8039; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 8040; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8041; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8042; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 8043; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8044; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8045; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8046; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8047; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8048; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8049; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8050; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8051; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8052; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8053; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8054; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 8055; GFX12-WGP-NEXT: s_endpgm 8056; 8057; GFX12-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: 8058; GFX12-CU: ; %bb.0: ; %entry 8059; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8060; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8061; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8062; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8063; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 8064; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 8065; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8066; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 8067; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8068; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8069; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 8070; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8071; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8072; GFX12-CU-NEXT: s_wait_storecnt 0x0 8073; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8074; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8075; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8076; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8077; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8078; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8079; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8080; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8081; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 8082; GFX12-CU-NEXT: s_endpgm 8083 ptr %out, i32 %in, i32 %old) { 8084entry: 8085 %gep = getelementptr i32, ptr %out, i32 4 8086 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic 8087 %val0 = extractvalue { i32, i1 } %val, 0 8088 store i32 %val0, ptr %out, align 4 8089 ret void 8090} 8091 8092define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( 8093; GFX7-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8094; GFX7: ; %bb.0: ; %entry 8095; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8096; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8097; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8098; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8099; GFX7-NEXT: s_mov_b64 s[12:13], 16 8100; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8101; GFX7-NEXT: s_mov_b32 s6, s4 8102; GFX7-NEXT: s_mov_b32 s7, s5 8103; GFX7-NEXT: s_mov_b32 s11, s12 8104; GFX7-NEXT: s_mov_b32 s10, s13 8105; GFX7-NEXT: s_add_u32 s6, s6, s11 8106; GFX7-NEXT: s_addc_u32 s10, s7, s10 8107; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8108; GFX7-NEXT: s_mov_b32 s7, s10 8109; GFX7-NEXT: v_mov_b32_e32 v2, s9 8110; GFX7-NEXT: v_mov_b32_e32 v0, s8 8111; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8112; GFX7-NEXT: v_mov_b32_e32 v3, v0 8113; GFX7-NEXT: v_mov_b32_e32 v0, s6 8114; GFX7-NEXT: v_mov_b32_e32 v1, s7 8115; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8116; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8117; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8118; GFX7-NEXT: buffer_wbinvl1_vol 8119; GFX7-NEXT: v_mov_b32_e32 v0, s4 8120; GFX7-NEXT: v_mov_b32_e32 v1, s5 8121; GFX7-NEXT: flat_store_dword v[0:1], v2 8122; GFX7-NEXT: s_endpgm 8123; 8124; GFX10-WGP-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8125; GFX10-WGP: ; %bb.0: ; %entry 8126; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 8127; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8128; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 8129; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 8130; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 8131; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8132; GFX10-WGP-NEXT: s_mov_b32 s6, s4 8133; GFX10-WGP-NEXT: s_mov_b32 s7, s5 8134; GFX10-WGP-NEXT: s_mov_b32 s11, s12 8135; GFX10-WGP-NEXT: s_mov_b32 s10, s13 8136; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 8137; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 8138; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8139; GFX10-WGP-NEXT: s_mov_b32 s7, s10 8140; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 8141; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 8142; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8143; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 8144; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 8145; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8146; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8147; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8148; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8149; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8150; GFX10-WGP-NEXT: buffer_gl1_inv 8151; GFX10-WGP-NEXT: buffer_gl0_inv 8152; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 8153; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 8154; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 8155; GFX10-WGP-NEXT: s_endpgm 8156; 8157; GFX10-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8158; GFX10-CU: ; %bb.0: ; %entry 8159; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 8160; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8161; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 8162; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 8163; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 8164; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8165; GFX10-CU-NEXT: s_mov_b32 s6, s4 8166; GFX10-CU-NEXT: s_mov_b32 s7, s5 8167; GFX10-CU-NEXT: s_mov_b32 s11, s12 8168; GFX10-CU-NEXT: s_mov_b32 s10, s13 8169; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 8170; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 8171; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8172; GFX10-CU-NEXT: s_mov_b32 s7, s10 8173; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 8174; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 8175; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8176; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 8177; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 8178; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8179; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8180; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 8181; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8182; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8183; GFX10-CU-NEXT: buffer_gl1_inv 8184; GFX10-CU-NEXT: buffer_gl0_inv 8185; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 8186; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 8187; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 8188; GFX10-CU-NEXT: s_endpgm 8189; 8190; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8191; SKIP-CACHE-INV: ; %bb.0: ; %entry 8192; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8193; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8194; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8195; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8196; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 8197; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8198; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 8199; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 8200; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 8201; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 8202; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 8203; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 8204; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 8205; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8206; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 8207; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 8208; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8209; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 8210; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 8211; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 8212; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8213; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8214; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8215; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 8216; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 8217; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 8218; SKIP-CACHE-INV-NEXT: s_endpgm 8219; 8220; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8221; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8222; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8223; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8224; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8225; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8226; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8227; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8228; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8229; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8230; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8231; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 8232; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8233; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8234; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8235; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8236; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8237; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8238; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 8239; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8240; 8241; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8242; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8243; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8244; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8245; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8246; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8247; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8248; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8249; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8250; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8251; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8252; GFX90A-TGSPLIT-NEXT: buffer_wbl2 8253; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8254; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8255; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8256; GFX90A-TGSPLIT-NEXT: buffer_invl2 8257; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8258; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8259; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 8260; GFX90A-TGSPLIT-NEXT: s_endpgm 8261; 8262; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8263; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8264; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8265; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8266; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8267; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8268; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8269; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8270; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8271; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8272; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8273; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8274; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8275; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8276; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8277; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8278; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8279; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8280; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8281; 8282; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8283; GFX940-TGSPLIT: ; %bb.0: ; %entry 8284; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8285; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8286; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8287; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8288; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8289; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8290; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8291; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8292; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8293; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 8294; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8295; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8296; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8297; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8298; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8299; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8300; GFX940-TGSPLIT-NEXT: s_endpgm 8301; 8302; GFX11-WGP-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8303; GFX11-WGP: ; %bb.0: ; %entry 8304; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8305; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8306; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8307; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8308; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 8309; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 8310; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8311; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 8312; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8313; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8314; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8315; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 8316; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8317; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8318; GFX11-WGP-NEXT: buffer_gl1_inv 8319; GFX11-WGP-NEXT: buffer_gl0_inv 8320; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8321; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8322; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 8323; GFX11-WGP-NEXT: s_endpgm 8324; 8325; GFX11-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8326; GFX11-CU: ; %bb.0: ; %entry 8327; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8328; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8329; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8330; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8331; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 8332; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 8333; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8334; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 8335; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8336; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8337; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8338; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 8339; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8340; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8341; GFX11-CU-NEXT: buffer_gl1_inv 8342; GFX11-CU-NEXT: buffer_gl0_inv 8343; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8344; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8345; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 8346; GFX11-CU-NEXT: s_endpgm 8347; 8348; GFX12-WGP-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8349; GFX12-WGP: ; %bb.0: ; %entry 8350; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8351; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8352; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8353; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8354; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 8355; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 8356; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8357; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 8358; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8359; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8360; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 8361; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8362; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8363; GFX12-WGP-NEXT: s_wait_storecnt 0x0 8364; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8365; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8366; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8367; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8368; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8369; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8370; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8371; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8372; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 8373; GFX12-WGP-NEXT: s_endpgm 8374; 8375; GFX12-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: 8376; GFX12-CU: ; %bb.0: ; %entry 8377; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8378; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8379; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8380; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8381; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 8382; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 8383; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8384; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 8385; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8386; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8387; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 8388; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8389; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8390; GFX12-CU-NEXT: s_wait_storecnt 0x0 8391; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8392; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8393; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8394; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8395; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8396; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8397; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8398; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8399; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 8400; GFX12-CU-NEXT: s_endpgm 8401 ptr %out, i32 %in, i32 %old) { 8402entry: 8403 %gep = getelementptr i32, ptr %out, i32 4 8404 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic 8405 %val0 = extractvalue { i32, i1 } %val, 0 8406 store i32 %val0, ptr %out, align 4 8407 ret void 8408} 8409 8410define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( 8411; GFX7-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8412; GFX7: ; %bb.0: ; %entry 8413; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8414; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8415; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8416; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8417; GFX7-NEXT: s_mov_b64 s[12:13], 16 8418; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8419; GFX7-NEXT: s_mov_b32 s6, s4 8420; GFX7-NEXT: s_mov_b32 s7, s5 8421; GFX7-NEXT: s_mov_b32 s11, s12 8422; GFX7-NEXT: s_mov_b32 s10, s13 8423; GFX7-NEXT: s_add_u32 s6, s6, s11 8424; GFX7-NEXT: s_addc_u32 s10, s7, s10 8425; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8426; GFX7-NEXT: s_mov_b32 s7, s10 8427; GFX7-NEXT: v_mov_b32_e32 v2, s9 8428; GFX7-NEXT: v_mov_b32_e32 v0, s8 8429; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8430; GFX7-NEXT: v_mov_b32_e32 v3, v0 8431; GFX7-NEXT: v_mov_b32_e32 v0, s6 8432; GFX7-NEXT: v_mov_b32_e32 v1, s7 8433; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8434; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8435; GFX7-NEXT: buffer_wbinvl1_vol 8436; GFX7-NEXT: v_mov_b32_e32 v0, s4 8437; GFX7-NEXT: v_mov_b32_e32 v1, s5 8438; GFX7-NEXT: flat_store_dword v[0:1], v2 8439; GFX7-NEXT: s_endpgm 8440; 8441; GFX10-WGP-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8442; GFX10-WGP: ; %bb.0: ; %entry 8443; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 8444; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8445; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 8446; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 8447; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 8448; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8449; GFX10-WGP-NEXT: s_mov_b32 s6, s4 8450; GFX10-WGP-NEXT: s_mov_b32 s7, s5 8451; GFX10-WGP-NEXT: s_mov_b32 s11, s12 8452; GFX10-WGP-NEXT: s_mov_b32 s10, s13 8453; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 8454; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 8455; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8456; GFX10-WGP-NEXT: s_mov_b32 s7, s10 8457; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 8458; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 8459; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8460; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 8461; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 8462; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8463; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8464; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8465; GFX10-WGP-NEXT: buffer_gl1_inv 8466; GFX10-WGP-NEXT: buffer_gl0_inv 8467; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 8468; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 8469; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 8470; GFX10-WGP-NEXT: s_endpgm 8471; 8472; GFX10-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8473; GFX10-CU: ; %bb.0: ; %entry 8474; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 8475; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8476; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 8477; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 8478; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 8479; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8480; GFX10-CU-NEXT: s_mov_b32 s6, s4 8481; GFX10-CU-NEXT: s_mov_b32 s7, s5 8482; GFX10-CU-NEXT: s_mov_b32 s11, s12 8483; GFX10-CU-NEXT: s_mov_b32 s10, s13 8484; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 8485; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 8486; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8487; GFX10-CU-NEXT: s_mov_b32 s7, s10 8488; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 8489; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 8490; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8491; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 8492; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 8493; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8494; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8495; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8496; GFX10-CU-NEXT: buffer_gl1_inv 8497; GFX10-CU-NEXT: buffer_gl0_inv 8498; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 8499; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 8500; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 8501; GFX10-CU-NEXT: s_endpgm 8502; 8503; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8504; SKIP-CACHE-INV: ; %bb.0: ; %entry 8505; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8506; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8507; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8508; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8509; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 8510; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8511; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 8512; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 8513; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 8514; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 8515; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 8516; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 8517; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 8518; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8519; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 8520; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 8521; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8522; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 8523; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 8524; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 8525; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8526; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8527; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 8528; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 8529; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 8530; SKIP-CACHE-INV-NEXT: s_endpgm 8531; 8532; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8533; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8534; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8535; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8536; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8537; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8538; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8539; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8540; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8541; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8542; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8543; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8544; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8545; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8546; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8547; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8548; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 8549; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8550; 8551; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8552; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8553; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8554; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8555; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8556; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8557; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8558; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8559; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8560; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8561; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8562; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8563; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8564; GFX90A-TGSPLIT-NEXT: buffer_invl2 8565; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8566; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8567; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 8568; GFX90A-TGSPLIT-NEXT: s_endpgm 8569; 8570; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8571; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8572; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8573; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8574; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8575; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8576; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8577; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8578; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8579; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8580; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8581; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8582; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8583; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8584; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8585; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8586; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8587; 8588; GFX940-TGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8589; GFX940-TGSPLIT: ; %bb.0: ; %entry 8590; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8591; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8592; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8593; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8594; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8595; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8596; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8597; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8598; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8599; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8600; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8601; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8602; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8603; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8604; GFX940-TGSPLIT-NEXT: s_endpgm 8605; 8606; GFX11-WGP-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8607; GFX11-WGP: ; %bb.0: ; %entry 8608; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8609; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8610; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8611; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8612; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 8613; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 8614; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8615; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 8616; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8617; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8618; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8619; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8620; GFX11-WGP-NEXT: buffer_gl1_inv 8621; GFX11-WGP-NEXT: buffer_gl0_inv 8622; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8623; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8624; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 8625; GFX11-WGP-NEXT: s_endpgm 8626; 8627; GFX11-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8628; GFX11-CU: ; %bb.0: ; %entry 8629; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8630; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8631; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8632; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8633; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 8634; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 8635; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8636; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 8637; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8638; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8639; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8640; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8641; GFX11-CU-NEXT: buffer_gl1_inv 8642; GFX11-CU-NEXT: buffer_gl0_inv 8643; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8644; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8645; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 8646; GFX11-CU-NEXT: s_endpgm 8647; 8648; GFX12-WGP-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8649; GFX12-WGP: ; %bb.0: ; %entry 8650; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8651; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8652; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8653; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8654; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 8655; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 8656; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8657; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 8658; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8659; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8660; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8661; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 8662; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 8663; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8664; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8665; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8666; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8667; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 8668; GFX12-WGP-NEXT: s_endpgm 8669; 8670; GFX12-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: 8671; GFX12-CU: ; %bb.0: ; %entry 8672; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8673; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8674; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8675; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8676; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 8677; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 8678; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8679; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 8680; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8681; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8682; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8683; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 8684; GFX12-CU-NEXT: s_wait_samplecnt 0x0 8685; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8686; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8687; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8688; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8689; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 8690; GFX12-CU-NEXT: s_endpgm 8691 ptr %out, i32 %in, i32 %old) { 8692entry: 8693 %gep = getelementptr i32, ptr %out, i32 4 8694 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire 8695 %val0 = extractvalue { i32, i1 } %val, 0 8696 store i32 %val0, ptr %out, align 4 8697 ret void 8698} 8699 8700define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( 8701; GFX7-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8702; GFX7: ; %bb.0: ; %entry 8703; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8704; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8705; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8706; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8707; GFX7-NEXT: s_mov_b64 s[12:13], 16 8708; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8709; GFX7-NEXT: s_mov_b32 s6, s4 8710; GFX7-NEXT: s_mov_b32 s7, s5 8711; GFX7-NEXT: s_mov_b32 s11, s12 8712; GFX7-NEXT: s_mov_b32 s10, s13 8713; GFX7-NEXT: s_add_u32 s6, s6, s11 8714; GFX7-NEXT: s_addc_u32 s10, s7, s10 8715; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8716; GFX7-NEXT: s_mov_b32 s7, s10 8717; GFX7-NEXT: v_mov_b32_e32 v2, s9 8718; GFX7-NEXT: v_mov_b32_e32 v0, s8 8719; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8720; GFX7-NEXT: v_mov_b32_e32 v3, v0 8721; GFX7-NEXT: v_mov_b32_e32 v0, s6 8722; GFX7-NEXT: v_mov_b32_e32 v1, s7 8723; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8724; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8725; GFX7-NEXT: buffer_wbinvl1_vol 8726; GFX7-NEXT: v_mov_b32_e32 v0, s4 8727; GFX7-NEXT: v_mov_b32_e32 v1, s5 8728; GFX7-NEXT: flat_store_dword v[0:1], v2 8729; GFX7-NEXT: s_endpgm 8730; 8731; GFX10-WGP-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8732; GFX10-WGP: ; %bb.0: ; %entry 8733; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 8734; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8735; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 8736; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 8737; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 8738; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 8739; GFX10-WGP-NEXT: s_mov_b32 s6, s4 8740; GFX10-WGP-NEXT: s_mov_b32 s7, s5 8741; GFX10-WGP-NEXT: s_mov_b32 s11, s12 8742; GFX10-WGP-NEXT: s_mov_b32 s10, s13 8743; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 8744; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 8745; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8746; GFX10-WGP-NEXT: s_mov_b32 s7, s10 8747; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 8748; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 8749; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8750; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 8751; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 8752; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 8753; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8754; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8755; GFX10-WGP-NEXT: buffer_gl1_inv 8756; GFX10-WGP-NEXT: buffer_gl0_inv 8757; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 8758; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 8759; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 8760; GFX10-WGP-NEXT: s_endpgm 8761; 8762; GFX10-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8763; GFX10-CU: ; %bb.0: ; %entry 8764; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 8765; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8766; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 8767; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 8768; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 8769; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 8770; GFX10-CU-NEXT: s_mov_b32 s6, s4 8771; GFX10-CU-NEXT: s_mov_b32 s7, s5 8772; GFX10-CU-NEXT: s_mov_b32 s11, s12 8773; GFX10-CU-NEXT: s_mov_b32 s10, s13 8774; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 8775; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 8776; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 8777; GFX10-CU-NEXT: s_mov_b32 s7, s10 8778; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 8779; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 8780; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8781; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 8782; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 8783; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 8784; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8785; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8786; GFX10-CU-NEXT: buffer_gl1_inv 8787; GFX10-CU-NEXT: buffer_gl0_inv 8788; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 8789; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 8790; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 8791; GFX10-CU-NEXT: s_endpgm 8792; 8793; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8794; SKIP-CACHE-INV: ; %bb.0: ; %entry 8795; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 8796; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 8797; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 8798; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 8799; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 8800; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 8801; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 8802; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 8803; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 8804; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 8805; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 8806; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 8807; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 8808; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 8809; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 8810; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 8811; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8812; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 8813; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 8814; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 8815; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 8816; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8817; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 8818; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 8819; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 8820; SKIP-CACHE-INV-NEXT: s_endpgm 8821; 8822; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8823; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 8824; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8825; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8826; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8827; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8828; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8829; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8830; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8831; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8832; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8833; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8834; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8835; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 8836; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 8837; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8838; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 8839; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 8840; 8841; GFX90A-TGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8842; GFX90A-TGSPLIT: ; %bb.0: ; %entry 8843; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 8844; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 8845; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 8846; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8847; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 8848; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 8849; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8850; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8851; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8852; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 8853; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8854; GFX90A-TGSPLIT-NEXT: buffer_invl2 8855; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 8856; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 8857; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 8858; GFX90A-TGSPLIT-NEXT: s_endpgm 8859; 8860; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8861; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 8862; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8863; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8864; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8865; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8866; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8867; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8868; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8869; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8870; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8871; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8872; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8873; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 8874; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8875; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8876; GFX940-NOTTGSPLIT-NEXT: s_endpgm 8877; 8878; GFX940-TGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8879; GFX940-TGSPLIT: ; %bb.0: ; %entry 8880; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 8881; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 8882; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 8883; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 8884; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 8885; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 8886; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8887; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 8888; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8889; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 8890; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 8891; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 8892; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 8893; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 8894; GFX940-TGSPLIT-NEXT: s_endpgm 8895; 8896; GFX11-WGP-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8897; GFX11-WGP: ; %bb.0: ; %entry 8898; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8899; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8900; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8901; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 8902; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 8903; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 8904; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8905; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 8906; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8907; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8908; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8909; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8910; GFX11-WGP-NEXT: buffer_gl1_inv 8911; GFX11-WGP-NEXT: buffer_gl0_inv 8912; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 8913; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 8914; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 8915; GFX11-WGP-NEXT: s_endpgm 8916; 8917; GFX11-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8918; GFX11-CU: ; %bb.0: ; %entry 8919; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8920; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8921; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8922; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 8923; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 8924; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 8925; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8926; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 8927; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8928; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8929; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 8930; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 8931; GFX11-CU-NEXT: buffer_gl1_inv 8932; GFX11-CU-NEXT: buffer_gl0_inv 8933; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 8934; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 8935; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 8936; GFX11-CU-NEXT: s_endpgm 8937; 8938; GFX12-WGP-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8939; GFX12-WGP: ; %bb.0: ; %entry 8940; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8941; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 8942; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 8943; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 8944; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 8945; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 8946; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8947; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 8948; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8949; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8950; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8951; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 8952; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 8953; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 8954; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 8955; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 8956; GFX12-WGP-NEXT: s_endpgm 8957; 8958; GFX12-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg: 8959; GFX12-CU: ; %bb.0: ; %entry 8960; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 8961; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 8962; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 8963; GFX12-CU-NEXT: s_wait_kmcnt 0x0 8964; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 8965; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 8966; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 8967; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 8968; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8969; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8970; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 8971; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 8972; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 8973; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 8974; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 8975; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 8976; GFX12-CU-NEXT: s_endpgm 8977 ptr %out, i32 %in, i32 %old) { 8978entry: 8979 %gep = getelementptr i32, ptr %out, i32 4 8980 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire 8981 %val0 = extractvalue { i32, i1 } %val, 0 8982 store i32 %val0, ptr %out, align 4 8983 ret void 8984} 8985 8986define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( 8987; GFX7-LABEL: flat_system_release_acquire_ret_cmpxchg: 8988; GFX7: ; %bb.0: ; %entry 8989; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 8990; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 8991; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 8992; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 8993; GFX7-NEXT: s_mov_b64 s[12:13], 16 8994; GFX7-NEXT: s_waitcnt lgkmcnt(0) 8995; GFX7-NEXT: s_mov_b32 s6, s4 8996; GFX7-NEXT: s_mov_b32 s7, s5 8997; GFX7-NEXT: s_mov_b32 s11, s12 8998; GFX7-NEXT: s_mov_b32 s10, s13 8999; GFX7-NEXT: s_add_u32 s6, s6, s11 9000; GFX7-NEXT: s_addc_u32 s10, s7, s10 9001; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9002; GFX7-NEXT: s_mov_b32 s7, s10 9003; GFX7-NEXT: v_mov_b32_e32 v2, s9 9004; GFX7-NEXT: v_mov_b32_e32 v0, s8 9005; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9006; GFX7-NEXT: v_mov_b32_e32 v3, v0 9007; GFX7-NEXT: v_mov_b32_e32 v0, s6 9008; GFX7-NEXT: v_mov_b32_e32 v1, s7 9009; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9010; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9011; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9012; GFX7-NEXT: buffer_wbinvl1_vol 9013; GFX7-NEXT: v_mov_b32_e32 v0, s4 9014; GFX7-NEXT: v_mov_b32_e32 v1, s5 9015; GFX7-NEXT: flat_store_dword v[0:1], v2 9016; GFX7-NEXT: s_endpgm 9017; 9018; GFX10-WGP-LABEL: flat_system_release_acquire_ret_cmpxchg: 9019; GFX10-WGP: ; %bb.0: ; %entry 9020; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 9021; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9022; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 9023; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 9024; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 9025; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9026; GFX10-WGP-NEXT: s_mov_b32 s6, s4 9027; GFX10-WGP-NEXT: s_mov_b32 s7, s5 9028; GFX10-WGP-NEXT: s_mov_b32 s11, s12 9029; GFX10-WGP-NEXT: s_mov_b32 s10, s13 9030; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 9031; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 9032; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9033; GFX10-WGP-NEXT: s_mov_b32 s7, s10 9034; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 9035; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 9036; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9037; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 9038; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 9039; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9040; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9041; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9042; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9043; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9044; GFX10-WGP-NEXT: buffer_gl1_inv 9045; GFX10-WGP-NEXT: buffer_gl0_inv 9046; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 9047; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 9048; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 9049; GFX10-WGP-NEXT: s_endpgm 9050; 9051; GFX10-CU-LABEL: flat_system_release_acquire_ret_cmpxchg: 9052; GFX10-CU: ; %bb.0: ; %entry 9053; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 9054; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9055; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 9056; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 9057; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 9058; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9059; GFX10-CU-NEXT: s_mov_b32 s6, s4 9060; GFX10-CU-NEXT: s_mov_b32 s7, s5 9061; GFX10-CU-NEXT: s_mov_b32 s11, s12 9062; GFX10-CU-NEXT: s_mov_b32 s10, s13 9063; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 9064; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 9065; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9066; GFX10-CU-NEXT: s_mov_b32 s7, s10 9067; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 9068; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 9069; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9070; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 9071; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 9072; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9073; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9074; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9075; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9076; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9077; GFX10-CU-NEXT: buffer_gl1_inv 9078; GFX10-CU-NEXT: buffer_gl0_inv 9079; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 9080; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 9081; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 9082; GFX10-CU-NEXT: s_endpgm 9083; 9084; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_ret_cmpxchg: 9085; SKIP-CACHE-INV: ; %bb.0: ; %entry 9086; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9087; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9088; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9089; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9090; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 9091; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9092; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 9093; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 9094; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 9095; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 9096; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 9097; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 9098; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 9099; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9100; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 9101; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 9102; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9103; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 9104; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 9105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 9106; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9107; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9108; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9109; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 9110; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 9111; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 9112; SKIP-CACHE-INV-NEXT: s_endpgm 9113; 9114; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: 9115; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9116; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9117; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9118; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9119; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9120; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9121; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9122; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9123; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9124; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9125; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9126; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9127; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9128; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9129; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9130; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9131; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9132; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 9133; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9134; 9135; GFX90A-TGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: 9136; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9137; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9138; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9139; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9140; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9141; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9142; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9143; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9144; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9145; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9146; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9147; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9148; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9149; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9150; GFX90A-TGSPLIT-NEXT: buffer_invl2 9151; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9152; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9153; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 9154; GFX90A-TGSPLIT-NEXT: s_endpgm 9155; 9156; GFX940-NOTTGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: 9157; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9158; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9159; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9160; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9161; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9162; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9163; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9164; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9165; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9166; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9167; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9168; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9169; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9170; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9171; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9172; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9173; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9174; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9175; 9176; GFX940-TGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: 9177; GFX940-TGSPLIT: ; %bb.0: ; %entry 9178; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9179; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9180; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9181; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9182; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9183; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9184; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9185; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9186; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9187; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9188; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9189; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9190; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9191; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9192; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9193; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9194; GFX940-TGSPLIT-NEXT: s_endpgm 9195; 9196; GFX11-WGP-LABEL: flat_system_release_acquire_ret_cmpxchg: 9197; GFX11-WGP: ; %bb.0: ; %entry 9198; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9199; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9200; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9201; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9202; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 9203; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 9204; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9205; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 9206; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9207; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9208; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9209; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9210; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9211; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9212; GFX11-WGP-NEXT: buffer_gl1_inv 9213; GFX11-WGP-NEXT: buffer_gl0_inv 9214; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9215; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9216; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 9217; GFX11-WGP-NEXT: s_endpgm 9218; 9219; GFX11-CU-LABEL: flat_system_release_acquire_ret_cmpxchg: 9220; GFX11-CU: ; %bb.0: ; %entry 9221; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9222; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9223; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9224; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9225; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 9226; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 9227; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9228; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 9229; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9230; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9231; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9232; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9233; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9234; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9235; GFX11-CU-NEXT: buffer_gl1_inv 9236; GFX11-CU-NEXT: buffer_gl0_inv 9237; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9238; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9239; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 9240; GFX11-CU-NEXT: s_endpgm 9241; 9242; GFX12-WGP-LABEL: flat_system_release_acquire_ret_cmpxchg: 9243; GFX12-WGP: ; %bb.0: ; %entry 9244; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9245; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9246; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9247; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9248; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 9249; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 9250; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9251; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 9252; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9253; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9254; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9255; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9256; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9257; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9258; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9259; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9260; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9261; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9262; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9263; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9264; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9265; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9266; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 9267; GFX12-WGP-NEXT: s_endpgm 9268; 9269; GFX12-CU-LABEL: flat_system_release_acquire_ret_cmpxchg: 9270; GFX12-CU: ; %bb.0: ; %entry 9271; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9272; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9273; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9274; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9275; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 9276; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 9277; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9278; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 9279; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9280; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9281; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9282; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9283; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9284; GFX12-CU-NEXT: s_wait_storecnt 0x0 9285; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9286; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9287; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9288; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9289; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9290; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9291; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9292; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9293; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 9294; GFX12-CU-NEXT: s_endpgm 9295 ptr %out, i32 %in, i32 %old) { 9296entry: 9297 %gep = getelementptr i32, ptr %out, i32 4 9298 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire 9299 %val0 = extractvalue { i32, i1 } %val, 0 9300 store i32 %val0, ptr %out, align 4 9301 ret void 9302} 9303 9304define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( 9305; GFX7-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9306; GFX7: ; %bb.0: ; %entry 9307; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9308; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9309; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9310; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9311; GFX7-NEXT: s_mov_b64 s[12:13], 16 9312; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9313; GFX7-NEXT: s_mov_b32 s6, s4 9314; GFX7-NEXT: s_mov_b32 s7, s5 9315; GFX7-NEXT: s_mov_b32 s11, s12 9316; GFX7-NEXT: s_mov_b32 s10, s13 9317; GFX7-NEXT: s_add_u32 s6, s6, s11 9318; GFX7-NEXT: s_addc_u32 s10, s7, s10 9319; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9320; GFX7-NEXT: s_mov_b32 s7, s10 9321; GFX7-NEXT: v_mov_b32_e32 v2, s9 9322; GFX7-NEXT: v_mov_b32_e32 v0, s8 9323; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9324; GFX7-NEXT: v_mov_b32_e32 v3, v0 9325; GFX7-NEXT: v_mov_b32_e32 v0, s6 9326; GFX7-NEXT: v_mov_b32_e32 v1, s7 9327; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9328; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9329; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9330; GFX7-NEXT: buffer_wbinvl1_vol 9331; GFX7-NEXT: v_mov_b32_e32 v0, s4 9332; GFX7-NEXT: v_mov_b32_e32 v1, s5 9333; GFX7-NEXT: flat_store_dword v[0:1], v2 9334; GFX7-NEXT: s_endpgm 9335; 9336; GFX10-WGP-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9337; GFX10-WGP: ; %bb.0: ; %entry 9338; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 9339; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9340; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 9341; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 9342; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 9343; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9344; GFX10-WGP-NEXT: s_mov_b32 s6, s4 9345; GFX10-WGP-NEXT: s_mov_b32 s7, s5 9346; GFX10-WGP-NEXT: s_mov_b32 s11, s12 9347; GFX10-WGP-NEXT: s_mov_b32 s10, s13 9348; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 9349; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 9350; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9351; GFX10-WGP-NEXT: s_mov_b32 s7, s10 9352; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 9353; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 9354; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9355; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 9356; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 9357; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9358; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9359; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9360; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9361; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9362; GFX10-WGP-NEXT: buffer_gl1_inv 9363; GFX10-WGP-NEXT: buffer_gl0_inv 9364; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 9365; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 9366; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 9367; GFX10-WGP-NEXT: s_endpgm 9368; 9369; GFX10-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9370; GFX10-CU: ; %bb.0: ; %entry 9371; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 9372; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9373; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 9374; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 9375; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 9376; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9377; GFX10-CU-NEXT: s_mov_b32 s6, s4 9378; GFX10-CU-NEXT: s_mov_b32 s7, s5 9379; GFX10-CU-NEXT: s_mov_b32 s11, s12 9380; GFX10-CU-NEXT: s_mov_b32 s10, s13 9381; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 9382; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 9383; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9384; GFX10-CU-NEXT: s_mov_b32 s7, s10 9385; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 9386; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 9387; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9388; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 9389; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 9390; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9391; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9392; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9393; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9394; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9395; GFX10-CU-NEXT: buffer_gl1_inv 9396; GFX10-CU-NEXT: buffer_gl0_inv 9397; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 9398; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 9399; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 9400; GFX10-CU-NEXT: s_endpgm 9401; 9402; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9403; SKIP-CACHE-INV: ; %bb.0: ; %entry 9404; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9405; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9406; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9407; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9408; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 9409; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9410; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 9411; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 9412; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 9413; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 9414; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 9415; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 9416; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 9417; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9418; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 9419; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 9420; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9421; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 9422; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 9423; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 9424; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9425; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9426; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9427; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 9428; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 9429; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 9430; SKIP-CACHE-INV-NEXT: s_endpgm 9431; 9432; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9433; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9434; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9435; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9436; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9437; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9438; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9439; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9440; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9441; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9442; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9443; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9444; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9445; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9446; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9447; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9448; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9449; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9450; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 9451; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9452; 9453; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9454; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9455; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9456; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9457; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9458; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9459; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9460; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9461; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9462; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9463; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9464; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9465; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9466; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9467; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9468; GFX90A-TGSPLIT-NEXT: buffer_invl2 9469; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9470; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9471; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 9472; GFX90A-TGSPLIT-NEXT: s_endpgm 9473; 9474; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9475; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9476; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9477; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9478; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9479; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9480; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9481; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9482; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9483; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9484; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9485; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9486; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9487; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9488; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9489; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9490; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9491; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9492; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9493; 9494; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9495; GFX940-TGSPLIT: ; %bb.0: ; %entry 9496; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9497; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9498; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9499; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9500; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9501; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9502; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9503; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9504; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9505; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9506; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9507; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9508; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9509; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9510; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9511; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9512; GFX940-TGSPLIT-NEXT: s_endpgm 9513; 9514; GFX11-WGP-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9515; GFX11-WGP: ; %bb.0: ; %entry 9516; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9517; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9518; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9519; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9520; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 9521; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 9522; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9523; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 9524; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9525; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9526; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9527; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9528; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9529; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9530; GFX11-WGP-NEXT: buffer_gl1_inv 9531; GFX11-WGP-NEXT: buffer_gl0_inv 9532; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9533; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9534; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 9535; GFX11-WGP-NEXT: s_endpgm 9536; 9537; GFX11-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9538; GFX11-CU: ; %bb.0: ; %entry 9539; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9540; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9541; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9542; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9543; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 9544; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 9545; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9546; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 9547; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9548; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9549; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9550; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9551; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9552; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9553; GFX11-CU-NEXT: buffer_gl1_inv 9554; GFX11-CU-NEXT: buffer_gl0_inv 9555; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9556; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9557; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 9558; GFX11-CU-NEXT: s_endpgm 9559; 9560; GFX12-WGP-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9561; GFX12-WGP: ; %bb.0: ; %entry 9562; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9563; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9564; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9565; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9566; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 9567; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 9568; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9569; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 9570; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9571; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9572; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9573; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9574; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9575; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9576; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9577; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9578; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9579; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9580; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9581; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9582; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9583; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9584; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 9585; GFX12-WGP-NEXT: s_endpgm 9586; 9587; GFX12-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: 9588; GFX12-CU: ; %bb.0: ; %entry 9589; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9590; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9591; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9592; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9593; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 9594; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 9595; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9596; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 9597; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9598; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9599; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9600; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9601; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9602; GFX12-CU-NEXT: s_wait_storecnt 0x0 9603; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9604; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9605; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9606; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9607; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9608; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9609; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9610; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9611; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 9612; GFX12-CU-NEXT: s_endpgm 9613 ptr %out, i32 %in, i32 %old) { 9614entry: 9615 %gep = getelementptr i32, ptr %out, i32 4 9616 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire 9617 %val0 = extractvalue { i32, i1 } %val, 0 9618 store i32 %val0, ptr %out, align 4 9619 ret void 9620} 9621 9622define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( 9623; GFX7-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9624; GFX7: ; %bb.0: ; %entry 9625; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9626; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9627; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9628; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9629; GFX7-NEXT: s_mov_b64 s[12:13], 16 9630; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9631; GFX7-NEXT: s_mov_b32 s6, s4 9632; GFX7-NEXT: s_mov_b32 s7, s5 9633; GFX7-NEXT: s_mov_b32 s11, s12 9634; GFX7-NEXT: s_mov_b32 s10, s13 9635; GFX7-NEXT: s_add_u32 s6, s6, s11 9636; GFX7-NEXT: s_addc_u32 s10, s7, s10 9637; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9638; GFX7-NEXT: s_mov_b32 s7, s10 9639; GFX7-NEXT: v_mov_b32_e32 v2, s9 9640; GFX7-NEXT: v_mov_b32_e32 v0, s8 9641; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9642; GFX7-NEXT: v_mov_b32_e32 v3, v0 9643; GFX7-NEXT: v_mov_b32_e32 v0, s6 9644; GFX7-NEXT: v_mov_b32_e32 v1, s7 9645; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9646; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9647; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9648; GFX7-NEXT: buffer_wbinvl1_vol 9649; GFX7-NEXT: v_mov_b32_e32 v0, s4 9650; GFX7-NEXT: v_mov_b32_e32 v1, s5 9651; GFX7-NEXT: flat_store_dword v[0:1], v2 9652; GFX7-NEXT: s_endpgm 9653; 9654; GFX10-WGP-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9655; GFX10-WGP: ; %bb.0: ; %entry 9656; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 9657; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9658; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 9659; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 9660; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 9661; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9662; GFX10-WGP-NEXT: s_mov_b32 s6, s4 9663; GFX10-WGP-NEXT: s_mov_b32 s7, s5 9664; GFX10-WGP-NEXT: s_mov_b32 s11, s12 9665; GFX10-WGP-NEXT: s_mov_b32 s10, s13 9666; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 9667; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 9668; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9669; GFX10-WGP-NEXT: s_mov_b32 s7, s10 9670; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 9671; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 9672; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9673; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 9674; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 9675; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9676; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9677; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9678; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9679; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9680; GFX10-WGP-NEXT: buffer_gl1_inv 9681; GFX10-WGP-NEXT: buffer_gl0_inv 9682; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 9683; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 9684; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 9685; GFX10-WGP-NEXT: s_endpgm 9686; 9687; GFX10-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9688; GFX10-CU: ; %bb.0: ; %entry 9689; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 9690; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9691; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 9692; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 9693; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 9694; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 9695; GFX10-CU-NEXT: s_mov_b32 s6, s4 9696; GFX10-CU-NEXT: s_mov_b32 s7, s5 9697; GFX10-CU-NEXT: s_mov_b32 s11, s12 9698; GFX10-CU-NEXT: s_mov_b32 s10, s13 9699; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 9700; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 9701; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9702; GFX10-CU-NEXT: s_mov_b32 s7, s10 9703; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 9704; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 9705; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9706; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 9707; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 9708; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 9709; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9710; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 9711; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9712; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9713; GFX10-CU-NEXT: buffer_gl1_inv 9714; GFX10-CU-NEXT: buffer_gl0_inv 9715; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 9716; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 9717; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 9718; GFX10-CU-NEXT: s_endpgm 9719; 9720; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9721; SKIP-CACHE-INV: ; %bb.0: ; %entry 9722; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 9723; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 9724; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 9725; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 9726; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 9727; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 9728; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 9729; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 9730; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 9731; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 9732; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 9733; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 9734; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 9735; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 9736; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 9737; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 9738; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9739; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 9740; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 9741; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 9742; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9743; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9744; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 9746; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 9747; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 9748; SKIP-CACHE-INV-NEXT: s_endpgm 9749; 9750; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9751; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 9752; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9753; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9754; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9755; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9756; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9757; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9758; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9759; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9760; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9761; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 9762; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9763; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9764; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9765; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 9766; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 9767; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9768; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 9769; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 9770; 9771; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9772; GFX90A-TGSPLIT: ; %bb.0: ; %entry 9773; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 9774; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 9775; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 9776; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9777; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 9778; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 9779; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9780; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9781; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9782; GFX90A-TGSPLIT-NEXT: buffer_wbl2 9783; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9784; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 9785; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9786; GFX90A-TGSPLIT-NEXT: buffer_invl2 9787; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 9788; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 9789; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 9790; GFX90A-TGSPLIT-NEXT: s_endpgm 9791; 9792; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9793; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 9794; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9795; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9796; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9797; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9798; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9799; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9800; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9801; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9802; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9803; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9804; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9805; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9806; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9807; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 9808; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9809; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9810; GFX940-NOTTGSPLIT-NEXT: s_endpgm 9811; 9812; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9813; GFX940-TGSPLIT: ; %bb.0: ; %entry 9814; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 9815; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 9816; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 9817; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 9818; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 9819; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 9820; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9821; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 9822; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9823; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 9824; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9825; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 9826; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 9827; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 9828; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 9829; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 9830; GFX940-TGSPLIT-NEXT: s_endpgm 9831; 9832; GFX11-WGP-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9833; GFX11-WGP: ; %bb.0: ; %entry 9834; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9835; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9836; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9837; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 9838; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 9839; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 9840; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9841; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 9842; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9843; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9844; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9845; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9846; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9847; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9848; GFX11-WGP-NEXT: buffer_gl1_inv 9849; GFX11-WGP-NEXT: buffer_gl0_inv 9850; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 9851; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 9852; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 9853; GFX11-WGP-NEXT: s_endpgm 9854; 9855; GFX11-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9856; GFX11-CU: ; %bb.0: ; %entry 9857; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9858; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9859; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9860; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 9861; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 9862; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 9863; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9864; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 9865; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9866; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9867; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9868; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 9869; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 9870; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9871; GFX11-CU-NEXT: buffer_gl1_inv 9872; GFX11-CU-NEXT: buffer_gl0_inv 9873; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 9874; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 9875; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 9876; GFX11-CU-NEXT: s_endpgm 9877; 9878; GFX12-WGP-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9879; GFX12-WGP: ; %bb.0: ; %entry 9880; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9881; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 9882; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 9883; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 9884; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 9885; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 9886; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9887; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 9888; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9889; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9890; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 9891; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9892; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9893; GFX12-WGP-NEXT: s_wait_storecnt 0x0 9894; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9895; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9896; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 9897; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 9898; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 9899; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 9900; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 9901; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 9902; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 9903; GFX12-WGP-NEXT: s_endpgm 9904; 9905; GFX12-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: 9906; GFX12-CU: ; %bb.0: ; %entry 9907; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 9908; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 9909; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 9910; GFX12-CU-NEXT: s_wait_kmcnt 0x0 9911; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 9912; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 9913; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9914; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 9915; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9916; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9917; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 9918; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9919; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9920; GFX12-CU-NEXT: s_wait_storecnt 0x0 9921; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9922; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 9923; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 9924; GFX12-CU-NEXT: s_wait_samplecnt 0x0 9925; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 9926; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 9927; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 9928; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 9929; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 9930; GFX12-CU-NEXT: s_endpgm 9931 ptr %out, i32 %in, i32 %old) { 9932entry: 9933 %gep = getelementptr i32, ptr %out, i32 4 9934 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire 9935 %val0 = extractvalue { i32, i1 } %val, 0 9936 store i32 %val0, ptr %out, align 4 9937 ret void 9938} 9939 9940define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( 9941; GFX7-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 9942; GFX7: ; %bb.0: ; %entry 9943; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 9944; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9945; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 9946; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 9947; GFX7-NEXT: s_mov_b64 s[12:13], 16 9948; GFX7-NEXT: s_waitcnt lgkmcnt(0) 9949; GFX7-NEXT: s_mov_b32 s6, s4 9950; GFX7-NEXT: s_mov_b32 s7, s5 9951; GFX7-NEXT: s_mov_b32 s11, s12 9952; GFX7-NEXT: s_mov_b32 s10, s13 9953; GFX7-NEXT: s_add_u32 s6, s6, s11 9954; GFX7-NEXT: s_addc_u32 s10, s7, s10 9955; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9956; GFX7-NEXT: s_mov_b32 s7, s10 9957; GFX7-NEXT: v_mov_b32_e32 v2, s9 9958; GFX7-NEXT: v_mov_b32_e32 v0, s8 9959; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9960; GFX7-NEXT: v_mov_b32_e32 v3, v0 9961; GFX7-NEXT: v_mov_b32_e32 v0, s6 9962; GFX7-NEXT: v_mov_b32_e32 v1, s7 9963; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9964; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9965; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9966; GFX7-NEXT: buffer_wbinvl1_vol 9967; GFX7-NEXT: v_mov_b32_e32 v0, s4 9968; GFX7-NEXT: v_mov_b32_e32 v1, s5 9969; GFX7-NEXT: flat_store_dword v[0:1], v2 9970; GFX7-NEXT: s_endpgm 9971; 9972; GFX10-WGP-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 9973; GFX10-WGP: ; %bb.0: ; %entry 9974; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 9975; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 9976; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 9977; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 9978; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 9979; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 9980; GFX10-WGP-NEXT: s_mov_b32 s6, s4 9981; GFX10-WGP-NEXT: s_mov_b32 s7, s5 9982; GFX10-WGP-NEXT: s_mov_b32 s11, s12 9983; GFX10-WGP-NEXT: s_mov_b32 s10, s13 9984; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 9985; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 9986; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 9987; GFX10-WGP-NEXT: s_mov_b32 s7, s10 9988; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 9989; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 9990; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 9991; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 9992; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 9993; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 9994; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9995; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 9996; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 9997; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 9998; GFX10-WGP-NEXT: buffer_gl1_inv 9999; GFX10-WGP-NEXT: buffer_gl0_inv 10000; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 10001; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 10002; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 10003; GFX10-WGP-NEXT: s_endpgm 10004; 10005; GFX10-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10006; GFX10-CU: ; %bb.0: ; %entry 10007; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 10008; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10009; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 10010; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 10011; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 10012; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10013; GFX10-CU-NEXT: s_mov_b32 s6, s4 10014; GFX10-CU-NEXT: s_mov_b32 s7, s5 10015; GFX10-CU-NEXT: s_mov_b32 s11, s12 10016; GFX10-CU-NEXT: s_mov_b32 s10, s13 10017; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 10018; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 10019; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10020; GFX10-CU-NEXT: s_mov_b32 s7, s10 10021; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 10022; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 10023; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10024; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 10025; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 10026; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10027; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10028; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10029; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10030; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10031; GFX10-CU-NEXT: buffer_gl1_inv 10032; GFX10-CU-NEXT: buffer_gl0_inv 10033; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 10034; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 10035; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 10036; GFX10-CU-NEXT: s_endpgm 10037; 10038; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10039; SKIP-CACHE-INV: ; %bb.0: ; %entry 10040; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10041; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10042; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10043; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10044; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 10045; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10046; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 10047; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 10048; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10049; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10050; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 10051; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 10052; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 10053; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10054; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 10055; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10056; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10057; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 10058; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 10059; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 10060; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10061; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10062; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10063; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 10064; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 10065; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 10066; SKIP-CACHE-INV-NEXT: s_endpgm 10067; 10068; GFX90A-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10069; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10070; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10071; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10072; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10073; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10074; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10075; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10076; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10077; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10078; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10079; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 10080; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10081; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10082; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10083; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 10084; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10085; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10086; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 10087; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10088; 10089; GFX90A-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10090; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10091; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10092; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10093; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10094; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10095; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10096; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10097; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10098; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10099; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10100; GFX90A-TGSPLIT-NEXT: buffer_wbl2 10101; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10102; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10103; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10104; GFX90A-TGSPLIT-NEXT: buffer_invl2 10105; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10106; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10107; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 10108; GFX90A-TGSPLIT-NEXT: s_endpgm 10109; 10110; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10111; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10112; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10113; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10114; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10115; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10116; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10117; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10118; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10119; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10120; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10121; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10122; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10123; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10124; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10125; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 10126; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10127; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10128; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10129; 10130; GFX940-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10131; GFX940-TGSPLIT: ; %bb.0: ; %entry 10132; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10133; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10134; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10135; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10136; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10137; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10138; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10139; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10140; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10141; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10142; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10143; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10144; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10145; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 10146; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10147; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10148; GFX940-TGSPLIT-NEXT: s_endpgm 10149; 10150; GFX11-WGP-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10151; GFX11-WGP: ; %bb.0: ; %entry 10152; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10153; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10154; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10155; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10156; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 10157; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 10158; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10159; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 10160; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10161; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10162; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10163; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10164; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10165; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10166; GFX11-WGP-NEXT: buffer_gl1_inv 10167; GFX11-WGP-NEXT: buffer_gl0_inv 10168; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10169; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10170; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 10171; GFX11-WGP-NEXT: s_endpgm 10172; 10173; GFX11-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10174; GFX11-CU: ; %bb.0: ; %entry 10175; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10176; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10177; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10178; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10179; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 10180; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 10181; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10182; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 10183; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10184; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10185; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10186; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10187; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10188; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10189; GFX11-CU-NEXT: buffer_gl1_inv 10190; GFX11-CU-NEXT: buffer_gl0_inv 10191; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10192; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10193; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 10194; GFX11-CU-NEXT: s_endpgm 10195; 10196; GFX12-WGP-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10197; GFX12-WGP: ; %bb.0: ; %entry 10198; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10199; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10200; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10201; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10202; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 10203; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 10204; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10205; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 10206; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10207; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10208; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 10209; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10210; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10211; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10212; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10213; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10214; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10215; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10216; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10217; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 10218; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10219; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10220; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 10221; GFX12-WGP-NEXT: s_endpgm 10222; 10223; GFX12-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: 10224; GFX12-CU: ; %bb.0: ; %entry 10225; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10226; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10227; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10228; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10229; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 10230; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 10231; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10232; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 10233; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10234; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10235; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 10236; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10237; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10238; GFX12-CU-NEXT: s_wait_storecnt 0x0 10239; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10240; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10241; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10242; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10243; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10244; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 10245; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10246; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10247; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 10248; GFX12-CU-NEXT: s_endpgm 10249 ptr %out, i32 %in, i32 %old) { 10250entry: 10251 %gep = getelementptr i32, ptr %out, i32 4 10252 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst 10253 %val0 = extractvalue { i32, i1 } %val, 0 10254 store i32 %val0, ptr %out, align 4 10255 ret void 10256} 10257 10258define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( 10259; GFX7-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10260; GFX7: ; %bb.0: ; %entry 10261; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10262; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10263; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10264; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10265; GFX7-NEXT: s_mov_b64 s[12:13], 16 10266; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10267; GFX7-NEXT: s_mov_b32 s6, s4 10268; GFX7-NEXT: s_mov_b32 s7, s5 10269; GFX7-NEXT: s_mov_b32 s11, s12 10270; GFX7-NEXT: s_mov_b32 s10, s13 10271; GFX7-NEXT: s_add_u32 s6, s6, s11 10272; GFX7-NEXT: s_addc_u32 s10, s7, s10 10273; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10274; GFX7-NEXT: s_mov_b32 s7, s10 10275; GFX7-NEXT: v_mov_b32_e32 v2, s9 10276; GFX7-NEXT: v_mov_b32_e32 v0, s8 10277; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10278; GFX7-NEXT: v_mov_b32_e32 v3, v0 10279; GFX7-NEXT: v_mov_b32_e32 v0, s6 10280; GFX7-NEXT: v_mov_b32_e32 v1, s7 10281; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10282; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10283; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10284; GFX7-NEXT: buffer_wbinvl1_vol 10285; GFX7-NEXT: v_mov_b32_e32 v0, s4 10286; GFX7-NEXT: v_mov_b32_e32 v1, s5 10287; GFX7-NEXT: flat_store_dword v[0:1], v2 10288; GFX7-NEXT: s_endpgm 10289; 10290; GFX10-WGP-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10291; GFX10-WGP: ; %bb.0: ; %entry 10292; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 10293; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10294; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 10295; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 10296; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 10297; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10298; GFX10-WGP-NEXT: s_mov_b32 s6, s4 10299; GFX10-WGP-NEXT: s_mov_b32 s7, s5 10300; GFX10-WGP-NEXT: s_mov_b32 s11, s12 10301; GFX10-WGP-NEXT: s_mov_b32 s10, s13 10302; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 10303; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 10304; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10305; GFX10-WGP-NEXT: s_mov_b32 s7, s10 10306; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 10307; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 10308; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10309; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 10310; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 10311; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10312; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10313; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10314; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10315; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10316; GFX10-WGP-NEXT: buffer_gl1_inv 10317; GFX10-WGP-NEXT: buffer_gl0_inv 10318; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 10319; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 10320; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 10321; GFX10-WGP-NEXT: s_endpgm 10322; 10323; GFX10-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10324; GFX10-CU: ; %bb.0: ; %entry 10325; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 10326; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10327; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 10328; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 10329; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 10330; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10331; GFX10-CU-NEXT: s_mov_b32 s6, s4 10332; GFX10-CU-NEXT: s_mov_b32 s7, s5 10333; GFX10-CU-NEXT: s_mov_b32 s11, s12 10334; GFX10-CU-NEXT: s_mov_b32 s10, s13 10335; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 10336; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 10337; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10338; GFX10-CU-NEXT: s_mov_b32 s7, s10 10339; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 10340; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 10341; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10342; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 10343; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 10344; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10345; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10346; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10347; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10348; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10349; GFX10-CU-NEXT: buffer_gl1_inv 10350; GFX10-CU-NEXT: buffer_gl0_inv 10351; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 10352; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 10353; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 10354; GFX10-CU-NEXT: s_endpgm 10355; 10356; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10357; SKIP-CACHE-INV: ; %bb.0: ; %entry 10358; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10359; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10360; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10361; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10362; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 10363; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10364; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 10365; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 10366; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10367; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10368; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 10369; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 10370; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 10371; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10372; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 10373; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10374; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10375; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 10376; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 10377; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 10378; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10379; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10380; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10381; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 10382; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 10383; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 10384; SKIP-CACHE-INV-NEXT: s_endpgm 10385; 10386; GFX90A-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10387; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10388; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10389; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10390; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10391; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10392; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10393; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10394; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10395; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10396; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10397; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 10398; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10399; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10400; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10401; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 10402; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10403; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10404; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 10405; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10406; 10407; GFX90A-TGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10408; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10409; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10410; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10411; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10412; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10413; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10414; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10415; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10416; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10417; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10418; GFX90A-TGSPLIT-NEXT: buffer_wbl2 10419; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10420; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10421; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10422; GFX90A-TGSPLIT-NEXT: buffer_invl2 10423; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10424; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10425; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 10426; GFX90A-TGSPLIT-NEXT: s_endpgm 10427; 10428; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10429; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10430; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10431; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10432; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10433; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10434; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10435; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10436; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10437; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10438; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10439; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10440; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10441; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10442; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10443; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 10444; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10445; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10446; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10447; 10448; GFX940-TGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10449; GFX940-TGSPLIT: ; %bb.0: ; %entry 10450; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10451; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10452; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10453; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10454; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10455; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10456; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10457; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10458; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10459; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10460; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10461; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10462; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10463; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 10464; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10465; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10466; GFX940-TGSPLIT-NEXT: s_endpgm 10467; 10468; GFX11-WGP-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10469; GFX11-WGP: ; %bb.0: ; %entry 10470; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10471; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10472; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10473; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10474; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 10475; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 10476; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10477; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 10478; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10479; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10480; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10481; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10482; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10483; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10484; GFX11-WGP-NEXT: buffer_gl1_inv 10485; GFX11-WGP-NEXT: buffer_gl0_inv 10486; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10487; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10488; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 10489; GFX11-WGP-NEXT: s_endpgm 10490; 10491; GFX11-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10492; GFX11-CU: ; %bb.0: ; %entry 10493; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10494; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10495; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10496; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10497; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 10498; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 10499; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10500; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 10501; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10502; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10503; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10504; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10505; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10506; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10507; GFX11-CU-NEXT: buffer_gl1_inv 10508; GFX11-CU-NEXT: buffer_gl0_inv 10509; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10510; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10511; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 10512; GFX11-CU-NEXT: s_endpgm 10513; 10514; GFX12-WGP-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10515; GFX12-WGP: ; %bb.0: ; %entry 10516; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10517; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10518; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10519; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10520; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 10521; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 10522; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10523; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 10524; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10525; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10526; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 10527; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10528; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10529; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10530; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10531; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10532; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10533; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 10534; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10535; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10536; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 10537; GFX12-WGP-NEXT: s_endpgm 10538; 10539; GFX12-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: 10540; GFX12-CU: ; %bb.0: ; %entry 10541; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10542; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10543; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10544; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10545; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 10546; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 10547; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10548; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 10549; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10550; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10551; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 10552; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10553; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10554; GFX12-CU-NEXT: s_wait_storecnt 0x0 10555; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10556; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10557; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10558; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 10559; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10560; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10561; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 10562; GFX12-CU-NEXT: s_endpgm 10563 ptr %out, i32 %in, i32 %old) { 10564entry: 10565 %gep = getelementptr i32, ptr %out, i32 4 10566 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst 10567 %val0 = extractvalue { i32, i1 } %val, 0 10568 store i32 %val0, ptr %out, align 4 10569 ret void 10570} 10571 10572define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( 10573; GFX7-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10574; GFX7: ; %bb.0: ; %entry 10575; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10576; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10577; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10578; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10579; GFX7-NEXT: s_mov_b64 s[12:13], 16 10580; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10581; GFX7-NEXT: s_mov_b32 s6, s4 10582; GFX7-NEXT: s_mov_b32 s7, s5 10583; GFX7-NEXT: s_mov_b32 s11, s12 10584; GFX7-NEXT: s_mov_b32 s10, s13 10585; GFX7-NEXT: s_add_u32 s6, s6, s11 10586; GFX7-NEXT: s_addc_u32 s10, s7, s10 10587; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10588; GFX7-NEXT: s_mov_b32 s7, s10 10589; GFX7-NEXT: v_mov_b32_e32 v2, s9 10590; GFX7-NEXT: v_mov_b32_e32 v0, s8 10591; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10592; GFX7-NEXT: v_mov_b32_e32 v3, v0 10593; GFX7-NEXT: v_mov_b32_e32 v0, s6 10594; GFX7-NEXT: v_mov_b32_e32 v1, s7 10595; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10596; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10597; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10598; GFX7-NEXT: buffer_wbinvl1_vol 10599; GFX7-NEXT: v_mov_b32_e32 v0, s4 10600; GFX7-NEXT: v_mov_b32_e32 v1, s5 10601; GFX7-NEXT: flat_store_dword v[0:1], v2 10602; GFX7-NEXT: s_endpgm 10603; 10604; GFX10-WGP-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10605; GFX10-WGP: ; %bb.0: ; %entry 10606; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 10607; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10608; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 10609; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 10610; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 10611; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10612; GFX10-WGP-NEXT: s_mov_b32 s6, s4 10613; GFX10-WGP-NEXT: s_mov_b32 s7, s5 10614; GFX10-WGP-NEXT: s_mov_b32 s11, s12 10615; GFX10-WGP-NEXT: s_mov_b32 s10, s13 10616; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 10617; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 10618; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10619; GFX10-WGP-NEXT: s_mov_b32 s7, s10 10620; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 10621; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 10622; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10623; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 10624; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 10625; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10626; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10627; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10628; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10629; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10630; GFX10-WGP-NEXT: buffer_gl1_inv 10631; GFX10-WGP-NEXT: buffer_gl0_inv 10632; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 10633; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 10634; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 10635; GFX10-WGP-NEXT: s_endpgm 10636; 10637; GFX10-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10638; GFX10-CU: ; %bb.0: ; %entry 10639; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 10640; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10641; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 10642; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 10643; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 10644; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10645; GFX10-CU-NEXT: s_mov_b32 s6, s4 10646; GFX10-CU-NEXT: s_mov_b32 s7, s5 10647; GFX10-CU-NEXT: s_mov_b32 s11, s12 10648; GFX10-CU-NEXT: s_mov_b32 s10, s13 10649; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 10650; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 10651; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10652; GFX10-CU-NEXT: s_mov_b32 s7, s10 10653; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 10654; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 10655; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10656; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 10657; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 10658; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10659; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10660; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10661; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10662; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10663; GFX10-CU-NEXT: buffer_gl1_inv 10664; GFX10-CU-NEXT: buffer_gl0_inv 10665; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 10666; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 10667; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 10668; GFX10-CU-NEXT: s_endpgm 10669; 10670; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10671; SKIP-CACHE-INV: ; %bb.0: ; %entry 10672; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10673; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10674; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10675; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10676; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 10677; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10678; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 10679; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 10680; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10681; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 10682; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 10683; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 10684; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 10685; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 10686; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 10687; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 10688; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10689; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 10690; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 10691; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 10692; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10693; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10694; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10695; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 10696; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 10697; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 10698; SKIP-CACHE-INV-NEXT: s_endpgm 10699; 10700; GFX90A-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10701; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 10702; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10703; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10704; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10705; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10706; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10707; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10708; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10709; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10710; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10711; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 10712; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10713; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10714; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10715; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 10716; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 10717; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10718; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 10719; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 10720; 10721; GFX90A-TGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10722; GFX90A-TGSPLIT: ; %bb.0: ; %entry 10723; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 10724; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 10725; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 10726; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10727; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 10728; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 10729; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10730; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10731; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10732; GFX90A-TGSPLIT-NEXT: buffer_wbl2 10733; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10734; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 10735; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10736; GFX90A-TGSPLIT-NEXT: buffer_invl2 10737; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 10738; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 10739; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 10740; GFX90A-TGSPLIT-NEXT: s_endpgm 10741; 10742; GFX940-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10743; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 10744; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10745; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10746; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10747; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10748; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10749; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10750; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10751; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10752; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10753; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10754; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10755; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10756; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10757; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 10758; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10759; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10760; GFX940-NOTTGSPLIT-NEXT: s_endpgm 10761; 10762; GFX940-TGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10763; GFX940-TGSPLIT: ; %bb.0: ; %entry 10764; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 10765; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 10766; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 10767; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 10768; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 10769; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 10770; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10771; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 10772; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10773; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 10774; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10775; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 10776; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 10777; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 10778; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 10779; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 10780; GFX940-TGSPLIT-NEXT: s_endpgm 10781; 10782; GFX11-WGP-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10783; GFX11-WGP: ; %bb.0: ; %entry 10784; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10785; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10786; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10787; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 10788; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 10789; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 10790; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10791; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 10792; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10793; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10794; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10795; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10796; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10797; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10798; GFX11-WGP-NEXT: buffer_gl1_inv 10799; GFX11-WGP-NEXT: buffer_gl0_inv 10800; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 10801; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 10802; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 10803; GFX11-WGP-NEXT: s_endpgm 10804; 10805; GFX11-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10806; GFX11-CU: ; %bb.0: ; %entry 10807; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10808; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10809; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10810; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 10811; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 10812; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 10813; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10814; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 10815; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10816; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10817; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10818; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 10819; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 10820; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10821; GFX11-CU-NEXT: buffer_gl1_inv 10822; GFX11-CU-NEXT: buffer_gl0_inv 10823; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 10824; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 10825; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 10826; GFX11-CU-NEXT: s_endpgm 10827; 10828; GFX12-WGP-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10829; GFX12-WGP: ; %bb.0: ; %entry 10830; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10831; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 10832; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 10833; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 10834; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 10835; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 10836; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10837; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 10838; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10839; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10840; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 10841; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10842; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10843; GFX12-WGP-NEXT: s_wait_storecnt 0x0 10844; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10845; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10846; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 10847; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 10848; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 10849; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 10850; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 10851; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 10852; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 10853; GFX12-WGP-NEXT: s_endpgm 10854; 10855; GFX12-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg: 10856; GFX12-CU: ; %bb.0: ; %entry 10857; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 10858; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 10859; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 10860; GFX12-CU-NEXT: s_wait_kmcnt 0x0 10861; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 10862; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 10863; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10864; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 10865; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10866; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10867; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 10868; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10869; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10870; GFX12-CU-NEXT: s_wait_storecnt 0x0 10871; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10872; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 10873; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 10874; GFX12-CU-NEXT: s_wait_samplecnt 0x0 10875; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 10876; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 10877; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 10878; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 10879; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 10880; GFX12-CU-NEXT: s_endpgm 10881 ptr %out, i32 %in, i32 %old) { 10882entry: 10883 %gep = getelementptr i32, ptr %out, i32 4 10884 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst 10885 %val0 = extractvalue { i32, i1 } %val, 0 10886 store i32 %val0, ptr %out, align 4 10887 ret void 10888} 10889 10890define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( 10891; GFX7-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 10892; GFX7: ; %bb.0: ; %entry 10893; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 10894; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10895; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 10896; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 10897; GFX7-NEXT: s_mov_b64 s[12:13], 16 10898; GFX7-NEXT: s_waitcnt lgkmcnt(0) 10899; GFX7-NEXT: s_mov_b32 s6, s4 10900; GFX7-NEXT: s_mov_b32 s7, s5 10901; GFX7-NEXT: s_mov_b32 s11, s12 10902; GFX7-NEXT: s_mov_b32 s10, s13 10903; GFX7-NEXT: s_add_u32 s6, s6, s11 10904; GFX7-NEXT: s_addc_u32 s10, s7, s10 10905; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10906; GFX7-NEXT: s_mov_b32 s7, s10 10907; GFX7-NEXT: v_mov_b32_e32 v2, s9 10908; GFX7-NEXT: v_mov_b32_e32 v0, s8 10909; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10910; GFX7-NEXT: v_mov_b32_e32 v3, v0 10911; GFX7-NEXT: v_mov_b32_e32 v0, s6 10912; GFX7-NEXT: v_mov_b32_e32 v1, s7 10913; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10914; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10915; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10916; GFX7-NEXT: buffer_wbinvl1_vol 10917; GFX7-NEXT: v_mov_b32_e32 v0, s4 10918; GFX7-NEXT: v_mov_b32_e32 v1, s5 10919; GFX7-NEXT: flat_store_dword v[0:1], v2 10920; GFX7-NEXT: s_endpgm 10921; 10922; GFX10-WGP-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 10923; GFX10-WGP: ; %bb.0: ; %entry 10924; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 10925; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10926; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 10927; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 10928; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 10929; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 10930; GFX10-WGP-NEXT: s_mov_b32 s6, s4 10931; GFX10-WGP-NEXT: s_mov_b32 s7, s5 10932; GFX10-WGP-NEXT: s_mov_b32 s11, s12 10933; GFX10-WGP-NEXT: s_mov_b32 s10, s13 10934; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 10935; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 10936; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10937; GFX10-WGP-NEXT: s_mov_b32 s7, s10 10938; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 10939; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 10940; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10941; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 10942; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 10943; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 10944; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10945; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 10946; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10947; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10948; GFX10-WGP-NEXT: buffer_gl1_inv 10949; GFX10-WGP-NEXT: buffer_gl0_inv 10950; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 10951; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 10952; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 10953; GFX10-WGP-NEXT: s_endpgm 10954; 10955; GFX10-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 10956; GFX10-CU: ; %bb.0: ; %entry 10957; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 10958; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 10959; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 10960; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 10961; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 10962; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 10963; GFX10-CU-NEXT: s_mov_b32 s6, s4 10964; GFX10-CU-NEXT: s_mov_b32 s7, s5 10965; GFX10-CU-NEXT: s_mov_b32 s11, s12 10966; GFX10-CU-NEXT: s_mov_b32 s10, s13 10967; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 10968; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 10969; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 10970; GFX10-CU-NEXT: s_mov_b32 s7, s10 10971; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 10972; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 10973; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 10974; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 10975; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 10976; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 10977; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10978; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 10979; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 10980; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 10981; GFX10-CU-NEXT: buffer_gl1_inv 10982; GFX10-CU-NEXT: buffer_gl0_inv 10983; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 10984; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 10985; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 10986; GFX10-CU-NEXT: s_endpgm 10987; 10988; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 10989; SKIP-CACHE-INV: ; %bb.0: ; %entry 10990; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 10991; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 10992; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 10993; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 10994; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 10995; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 10996; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 10997; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 10998; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 10999; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11000; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 11001; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 11002; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 11003; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 11004; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 11005; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11006; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11007; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 11008; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 11009; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 11010; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11011; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 11012; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11013; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 11014; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 11015; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 11016; SKIP-CACHE-INV-NEXT: s_endpgm 11017; 11018; GFX90A-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11019; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11020; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11021; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11022; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11023; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11024; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11025; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 11026; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11027; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11028; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11029; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 11030; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11031; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 11032; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11033; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 11034; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11035; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11036; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 11037; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11038; 11039; GFX90A-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11040; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11041; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11042; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11043; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11044; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11045; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11046; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 11047; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11048; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11049; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11050; GFX90A-TGSPLIT-NEXT: buffer_wbl2 11051; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11052; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 11053; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11054; GFX90A-TGSPLIT-NEXT: buffer_invl2 11055; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11056; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11057; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 11058; GFX90A-TGSPLIT-NEXT: s_endpgm 11059; 11060; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11061; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11062; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11063; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11064; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11065; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11066; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11067; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 11068; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11069; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11070; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11071; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11072; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11073; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 11074; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11075; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 11076; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11077; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11078; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11079; 11080; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11081; GFX940-TGSPLIT: ; %bb.0: ; %entry 11082; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11083; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11084; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11085; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11086; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11087; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 11088; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11089; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11090; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11091; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11092; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11093; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 11094; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11095; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 11096; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11097; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11098; GFX940-TGSPLIT-NEXT: s_endpgm 11099; 11100; GFX11-WGP-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11101; GFX11-WGP: ; %bb.0: ; %entry 11102; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11103; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11104; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11105; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11106; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 11107; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 11108; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11109; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 11110; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11111; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11112; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11113; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11114; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 11115; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11116; GFX11-WGP-NEXT: buffer_gl1_inv 11117; GFX11-WGP-NEXT: buffer_gl0_inv 11118; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11119; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11120; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 11121; GFX11-WGP-NEXT: s_endpgm 11122; 11123; GFX11-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11124; GFX11-CU: ; %bb.0: ; %entry 11125; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11126; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11127; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11128; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11129; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 11130; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 11131; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11132; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 11133; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11134; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11135; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11136; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11137; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 11138; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11139; GFX11-CU-NEXT: buffer_gl1_inv 11140; GFX11-CU-NEXT: buffer_gl0_inv 11141; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11142; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11143; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 11144; GFX11-CU-NEXT: s_endpgm 11145; 11146; GFX12-WGP-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11147; GFX12-WGP: ; %bb.0: ; %entry 11148; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11149; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11150; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11151; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11152; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 11153; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 11154; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11155; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 11156; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11157; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11158; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 11159; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11160; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11161; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11162; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11163; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 11164; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11165; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11166; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11167; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 11168; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11169; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11170; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 11171; GFX12-WGP-NEXT: s_endpgm 11172; 11173; GFX12-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: 11174; GFX12-CU: ; %bb.0: ; %entry 11175; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11176; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11177; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11178; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11179; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 11180; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 11181; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11182; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 11183; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11184; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11185; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 11186; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11187; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11188; GFX12-CU-NEXT: s_wait_storecnt 0x0 11189; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11190; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 11191; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11192; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11193; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11194; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 11195; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11196; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11197; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 11198; GFX12-CU-NEXT: s_endpgm 11199 ptr %out, i32 %in, i32 %old) { 11200entry: 11201 %gep = getelementptr i32, ptr %out, i32 4 11202 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst 11203 %val0 = extractvalue { i32, i1 } %val, 0 11204 store i32 %val0, ptr %out, align 4 11205 ret void 11206} 11207 11208define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( 11209; GFX7-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11210; GFX7: ; %bb.0: ; %entry 11211; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 11212; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 11213; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 11214; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 11215; GFX7-NEXT: s_mov_b64 s[12:13], 16 11216; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11217; GFX7-NEXT: s_mov_b32 s6, s4 11218; GFX7-NEXT: s_mov_b32 s7, s5 11219; GFX7-NEXT: s_mov_b32 s11, s12 11220; GFX7-NEXT: s_mov_b32 s10, s13 11221; GFX7-NEXT: s_add_u32 s6, s6, s11 11222; GFX7-NEXT: s_addc_u32 s10, s7, s10 11223; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 11224; GFX7-NEXT: s_mov_b32 s7, s10 11225; GFX7-NEXT: v_mov_b32_e32 v2, s9 11226; GFX7-NEXT: v_mov_b32_e32 v0, s8 11227; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11228; GFX7-NEXT: v_mov_b32_e32 v3, v0 11229; GFX7-NEXT: v_mov_b32_e32 v0, s6 11230; GFX7-NEXT: v_mov_b32_e32 v1, s7 11231; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11232; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 11233; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11234; GFX7-NEXT: buffer_wbinvl1_vol 11235; GFX7-NEXT: v_mov_b32_e32 v0, s4 11236; GFX7-NEXT: v_mov_b32_e32 v1, s5 11237; GFX7-NEXT: flat_store_dword v[0:1], v2 11238; GFX7-NEXT: s_endpgm 11239; 11240; GFX10-WGP-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11241; GFX10-WGP: ; %bb.0: ; %entry 11242; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 11243; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 11244; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 11245; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 11246; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 11247; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11248; GFX10-WGP-NEXT: s_mov_b32 s6, s4 11249; GFX10-WGP-NEXT: s_mov_b32 s7, s5 11250; GFX10-WGP-NEXT: s_mov_b32 s11, s12 11251; GFX10-WGP-NEXT: s_mov_b32 s10, s13 11252; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 11253; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 11254; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 11255; GFX10-WGP-NEXT: s_mov_b32 s7, s10 11256; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 11257; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 11258; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11259; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 11260; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 11261; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 11262; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11263; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11264; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 11265; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11266; GFX10-WGP-NEXT: buffer_gl1_inv 11267; GFX10-WGP-NEXT: buffer_gl0_inv 11268; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 11269; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 11270; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 11271; GFX10-WGP-NEXT: s_endpgm 11272; 11273; GFX10-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11274; GFX10-CU: ; %bb.0: ; %entry 11275; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 11276; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 11277; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 11278; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 11279; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 11280; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11281; GFX10-CU-NEXT: s_mov_b32 s6, s4 11282; GFX10-CU-NEXT: s_mov_b32 s7, s5 11283; GFX10-CU-NEXT: s_mov_b32 s11, s12 11284; GFX10-CU-NEXT: s_mov_b32 s10, s13 11285; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 11286; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 11287; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 11288; GFX10-CU-NEXT: s_mov_b32 s7, s10 11289; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 11290; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 11291; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11292; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 11293; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 11294; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 11295; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11296; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 11297; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 11298; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11299; GFX10-CU-NEXT: buffer_gl1_inv 11300; GFX10-CU-NEXT: buffer_gl0_inv 11301; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 11302; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 11303; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 11304; GFX10-CU-NEXT: s_endpgm 11305; 11306; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11307; SKIP-CACHE-INV: ; %bb.0: ; %entry 11308; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 11309; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 11310; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 11311; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 11312; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 11313; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11314; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 11315; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 11316; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 11317; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 11318; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 11319; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 11320; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 11321; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 11322; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 11323; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 11324; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11325; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 11326; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 11327; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 11328; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11329; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 11330; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11331; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 11332; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 11333; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 11334; SKIP-CACHE-INV-NEXT: s_endpgm 11335; 11336; GFX90A-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11337; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11338; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11339; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11340; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11341; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11342; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11343; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 11344; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11345; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11346; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11347; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 11348; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11349; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 11350; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11351; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 11352; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11353; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11354; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 11355; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11356; 11357; GFX90A-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11358; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11359; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 11360; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 11361; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 11362; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11363; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 11364; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 11365; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11366; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11367; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11368; GFX90A-TGSPLIT-NEXT: buffer_wbl2 11369; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11370; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 11371; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11372; GFX90A-TGSPLIT-NEXT: buffer_invl2 11373; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11374; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11375; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 11376; GFX90A-TGSPLIT-NEXT: s_endpgm 11377; 11378; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11379; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11380; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11381; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11382; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11383; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11384; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11385; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 11386; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11387; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11388; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11389; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11390; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11391; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 11392; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11393; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 11394; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11395; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11396; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11397; 11398; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11399; GFX940-TGSPLIT: ; %bb.0: ; %entry 11400; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 11401; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 11402; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 11403; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11404; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 11405; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 11406; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11407; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 11408; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11409; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 11410; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11411; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 11412; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11413; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 11414; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11415; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11416; GFX940-TGSPLIT-NEXT: s_endpgm 11417; 11418; GFX11-WGP-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11419; GFX11-WGP: ; %bb.0: ; %entry 11420; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11421; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11422; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11423; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11424; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 11425; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 11426; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11427; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 11428; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11429; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11430; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11431; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 11432; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 11433; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11434; GFX11-WGP-NEXT: buffer_gl1_inv 11435; GFX11-WGP-NEXT: buffer_gl0_inv 11436; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11437; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11438; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 11439; GFX11-WGP-NEXT: s_endpgm 11440; 11441; GFX11-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11442; GFX11-CU: ; %bb.0: ; %entry 11443; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11444; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11445; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11446; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11447; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 11448; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 11449; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11450; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 11451; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11452; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11453; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11454; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 11455; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 11456; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11457; GFX11-CU-NEXT: buffer_gl1_inv 11458; GFX11-CU-NEXT: buffer_gl0_inv 11459; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11460; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11461; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 11462; GFX11-CU-NEXT: s_endpgm 11463; 11464; GFX12-WGP-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11465; GFX12-WGP: ; %bb.0: ; %entry 11466; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11467; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 11468; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 11469; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11470; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 11471; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 11472; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11473; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 11474; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11475; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11476; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 11477; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11478; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11479; GFX12-WGP-NEXT: s_wait_storecnt 0x0 11480; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11481; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 11482; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 11483; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 11484; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11485; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 11486; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11487; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11488; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 11489; GFX12-WGP-NEXT: s_endpgm 11490; 11491; GFX12-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: 11492; GFX12-CU: ; %bb.0: ; %entry 11493; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 11494; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 11495; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 11496; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11497; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 11498; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 11499; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 11500; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 11501; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11502; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11503; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 11504; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11505; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11506; GFX12-CU-NEXT: s_wait_storecnt 0x0 11507; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11508; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 11509; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 11510; GFX12-CU-NEXT: s_wait_samplecnt 0x0 11511; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11512; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 11513; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11514; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11515; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 11516; GFX12-CU-NEXT: s_endpgm 11517 ptr %out, i32 %in, i32 %old) { 11518entry: 11519 %gep = getelementptr i32, ptr %out, i32 4 11520 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst 11521 %val0 = extractvalue { i32, i1 } %val, 0 11522 store i32 %val0, ptr %out, align 4 11523 ret void 11524} 11525 11526define amdgpu_kernel void @flat_system_one_as_unordered_load( 11527; GFX7-LABEL: flat_system_one_as_unordered_load: 11528; GFX7: ; %bb.0: ; %entry 11529; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11530; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11531; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11532; GFX7-NEXT: v_mov_b32_e32 v0, s6 11533; GFX7-NEXT: v_mov_b32_e32 v1, s7 11534; GFX7-NEXT: flat_load_dword v2, v[0:1] 11535; GFX7-NEXT: v_mov_b32_e32 v0, s4 11536; GFX7-NEXT: v_mov_b32_e32 v1, s5 11537; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11538; GFX7-NEXT: flat_store_dword v[0:1], v2 11539; GFX7-NEXT: s_endpgm 11540; 11541; GFX10-WGP-LABEL: flat_system_one_as_unordered_load: 11542; GFX10-WGP: ; %bb.0: ; %entry 11543; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11544; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11545; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11546; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 11547; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 11548; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] 11549; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 11550; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 11551; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11552; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 11553; GFX10-WGP-NEXT: s_endpgm 11554; 11555; GFX10-CU-LABEL: flat_system_one_as_unordered_load: 11556; GFX10-CU: ; %bb.0: ; %entry 11557; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11558; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11559; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11560; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 11561; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 11562; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] 11563; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 11564; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 11565; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11566; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 11567; GFX10-CU-NEXT: s_endpgm 11568; 11569; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_load: 11570; SKIP-CACHE-INV: ; %bb.0: ; %entry 11571; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11572; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 11573; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11574; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 11575; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 11576; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] 11577; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 11578; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 11579; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11580; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 11581; SKIP-CACHE-INV-NEXT: s_endpgm 11582; 11583; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_load: 11584; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11585; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11586; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11587; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11588; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11589; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] 11590; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11591; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11592; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 11593; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11594; 11595; GFX90A-TGSPLIT-LABEL: flat_system_one_as_unordered_load: 11596; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11597; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11598; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11599; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11600; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11601; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] 11602; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11603; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11604; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 11605; GFX90A-TGSPLIT-NEXT: s_endpgm 11606; 11607; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_load: 11608; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11609; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11610; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11611; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11612; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11613; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] 11614; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11615; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11616; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11617; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11618; 11619; GFX940-TGSPLIT-LABEL: flat_system_one_as_unordered_load: 11620; GFX940-TGSPLIT: ; %bb.0: ; %entry 11621; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11622; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11623; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11624; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11625; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] 11626; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11627; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11628; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11629; GFX940-TGSPLIT-NEXT: s_endpgm 11630; 11631; GFX11-WGP-LABEL: flat_system_one_as_unordered_load: 11632; GFX11-WGP: ; %bb.0: ; %entry 11633; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11634; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11635; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11636; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 11637; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 11638; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] 11639; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11640; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11641; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11642; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 11643; GFX11-WGP-NEXT: s_endpgm 11644; 11645; GFX11-CU-LABEL: flat_system_one_as_unordered_load: 11646; GFX11-CU: ; %bb.0: ; %entry 11647; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11648; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11649; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11650; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 11651; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 11652; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] 11653; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11654; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11655; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11656; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 11657; GFX11-CU-NEXT: s_endpgm 11658; 11659; GFX12-WGP-LABEL: flat_system_one_as_unordered_load: 11660; GFX12-WGP: ; %bb.0: ; %entry 11661; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11662; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11663; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11664; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 11665; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 11666; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] 11667; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11668; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11669; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11670; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 11671; GFX12-WGP-NEXT: s_endpgm 11672; 11673; GFX12-CU-LABEL: flat_system_one_as_unordered_load: 11674; GFX12-CU: ; %bb.0: ; %entry 11675; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11676; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11677; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11678; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 11679; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 11680; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] 11681; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11682; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11683; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11684; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 11685; GFX12-CU-NEXT: s_endpgm 11686 ptr %in, ptr %out) { 11687entry: 11688 %val = load atomic i32, ptr %in syncscope("one-as") unordered, align 4 11689 store i32 %val, ptr %out 11690 ret void 11691} 11692 11693define amdgpu_kernel void @flat_system_one_as_monotonic_load( 11694; GFX7-LABEL: flat_system_one_as_monotonic_load: 11695; GFX7: ; %bb.0: ; %entry 11696; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11697; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11698; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11699; GFX7-NEXT: v_mov_b32_e32 v0, s6 11700; GFX7-NEXT: v_mov_b32_e32 v1, s7 11701; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 11702; GFX7-NEXT: v_mov_b32_e32 v0, s4 11703; GFX7-NEXT: v_mov_b32_e32 v1, s5 11704; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11705; GFX7-NEXT: flat_store_dword v[0:1], v2 11706; GFX7-NEXT: s_endpgm 11707; 11708; GFX10-WGP-LABEL: flat_system_one_as_monotonic_load: 11709; GFX10-WGP: ; %bb.0: ; %entry 11710; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11711; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11712; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11713; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 11714; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 11715; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 11716; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 11717; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 11718; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11719; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 11720; GFX10-WGP-NEXT: s_endpgm 11721; 11722; GFX10-CU-LABEL: flat_system_one_as_monotonic_load: 11723; GFX10-CU: ; %bb.0: ; %entry 11724; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11725; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11726; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11727; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 11728; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 11729; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 11730; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 11731; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 11732; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11733; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 11734; GFX10-CU-NEXT: s_endpgm 11735; 11736; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_load: 11737; SKIP-CACHE-INV: ; %bb.0: ; %entry 11738; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11739; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 11740; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11741; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 11742; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 11743; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 11744; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 11745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 11746; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11747; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 11748; SKIP-CACHE-INV-NEXT: s_endpgm 11749; 11750; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_load: 11751; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11752; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11753; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11754; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11755; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11756; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 11757; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11758; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11759; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 11760; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11761; 11762; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_load: 11763; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11764; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11765; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11766; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11767; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11768; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 11769; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11770; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11771; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 11772; GFX90A-TGSPLIT-NEXT: s_endpgm 11773; 11774; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_load: 11775; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11776; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11777; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11778; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11779; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11780; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 11781; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11782; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11783; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11784; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11785; 11786; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_load: 11787; GFX940-TGSPLIT: ; %bb.0: ; %entry 11788; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11789; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11790; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11791; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11792; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 11793; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11794; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11795; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11796; GFX940-TGSPLIT-NEXT: s_endpgm 11797; 11798; GFX11-WGP-LABEL: flat_system_one_as_monotonic_load: 11799; GFX11-WGP: ; %bb.0: ; %entry 11800; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11801; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11802; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11803; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 11804; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 11805; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 11806; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11807; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11808; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11809; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 11810; GFX11-WGP-NEXT: s_endpgm 11811; 11812; GFX11-CU-LABEL: flat_system_one_as_monotonic_load: 11813; GFX11-CU: ; %bb.0: ; %entry 11814; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11815; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11816; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 11817; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 11818; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 11819; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 11820; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 11821; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 11822; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 11823; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 11824; GFX11-CU-NEXT: s_endpgm 11825; 11826; GFX12-WGP-LABEL: flat_system_one_as_monotonic_load: 11827; GFX12-WGP: ; %bb.0: ; %entry 11828; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11829; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11830; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 11831; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 11832; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 11833; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 11834; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 11835; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 11836; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 11837; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 11838; GFX12-WGP-NEXT: s_endpgm 11839; 11840; GFX12-CU-LABEL: flat_system_one_as_monotonic_load: 11841; GFX12-CU: ; %bb.0: ; %entry 11842; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11843; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11844; GFX12-CU-NEXT: s_wait_kmcnt 0x0 11845; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 11846; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 11847; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 11848; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 11849; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 11850; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 11851; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 11852; GFX12-CU-NEXT: s_endpgm 11853 ptr %in, ptr %out) { 11854entry: 11855 %val = load atomic i32, ptr %in syncscope("one-as") monotonic, align 4 11856 store i32 %val, ptr %out 11857 ret void 11858} 11859 11860define amdgpu_kernel void @flat_system_one_as_acquire_load( 11861; GFX7-LABEL: flat_system_one_as_acquire_load: 11862; GFX7: ; %bb.0: ; %entry 11863; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11864; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 11865; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11866; GFX7-NEXT: v_mov_b32_e32 v0, s6 11867; GFX7-NEXT: v_mov_b32_e32 v1, s7 11868; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 11869; GFX7-NEXT: s_waitcnt vmcnt(0) 11870; GFX7-NEXT: buffer_wbinvl1_vol 11871; GFX7-NEXT: v_mov_b32_e32 v0, s4 11872; GFX7-NEXT: v_mov_b32_e32 v1, s5 11873; GFX7-NEXT: s_waitcnt lgkmcnt(0) 11874; GFX7-NEXT: flat_store_dword v[0:1], v2 11875; GFX7-NEXT: s_endpgm 11876; 11877; GFX10-WGP-LABEL: flat_system_one_as_acquire_load: 11878; GFX10-WGP: ; %bb.0: ; %entry 11879; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11880; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11881; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11882; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 11883; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 11884; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 11885; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 11886; GFX10-WGP-NEXT: buffer_gl1_inv 11887; GFX10-WGP-NEXT: buffer_gl0_inv 11888; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 11889; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 11890; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 11891; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 11892; GFX10-WGP-NEXT: s_endpgm 11893; 11894; GFX10-CU-LABEL: flat_system_one_as_acquire_load: 11895; GFX10-CU: ; %bb.0: ; %entry 11896; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11897; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11898; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11899; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 11900; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 11901; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 11902; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 11903; GFX10-CU-NEXT: buffer_gl1_inv 11904; GFX10-CU-NEXT: buffer_gl0_inv 11905; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 11906; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 11907; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 11908; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 11909; GFX10-CU-NEXT: s_endpgm 11910; 11911; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_load: 11912; SKIP-CACHE-INV: ; %bb.0: ; %entry 11913; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11914; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 11915; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11916; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 11917; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 11918; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 11919; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 11920; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 11921; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 11922; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 11923; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 11924; SKIP-CACHE-INV-NEXT: s_endpgm 11925; 11926; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_load: 11927; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 11928; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11929; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11930; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11931; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11932; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 11933; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11934; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 11935; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 11936; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11937; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11938; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 11939; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 11940; 11941; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_load: 11942; GFX90A-TGSPLIT: ; %bb.0: ; %entry 11943; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 11944; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 11945; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11946; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 11947; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 11948; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11949; GFX90A-TGSPLIT-NEXT: buffer_invl2 11950; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 11951; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 11952; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 11953; GFX90A-TGSPLIT-NEXT: s_endpgm 11954; 11955; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_load: 11956; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 11957; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11958; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11959; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11960; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11961; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 11962; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 11963; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 11964; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11965; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11966; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11967; GFX940-NOTTGSPLIT-NEXT: s_endpgm 11968; 11969; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_load: 11970; GFX940-TGSPLIT: ; %bb.0: ; %entry 11971; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 11972; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 11973; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 11974; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 11975; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 11976; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 11977; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 11978; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 11979; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 11980; GFX940-TGSPLIT-NEXT: s_endpgm 11981; 11982; GFX11-WGP-LABEL: flat_system_one_as_acquire_load: 11983; GFX11-WGP: ; %bb.0: ; %entry 11984; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 11985; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 11986; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11987; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 11988; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 11989; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 11990; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 11991; GFX11-WGP-NEXT: buffer_gl1_inv 11992; GFX11-WGP-NEXT: buffer_gl0_inv 11993; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 11994; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 11995; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 11996; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 11997; GFX11-WGP-NEXT: s_endpgm 11998; 11999; GFX11-CU-LABEL: flat_system_one_as_acquire_load: 12000; GFX11-CU: ; %bb.0: ; %entry 12001; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12002; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12003; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12004; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12005; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12006; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 12007; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12008; GFX11-CU-NEXT: buffer_gl1_inv 12009; GFX11-CU-NEXT: buffer_gl0_inv 12010; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 12011; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 12012; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12013; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12014; GFX11-CU-NEXT: s_endpgm 12015; 12016; GFX12-WGP-LABEL: flat_system_one_as_acquire_load: 12017; GFX12-WGP: ; %bb.0: ; %entry 12018; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12019; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12020; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12021; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12022; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12023; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 12024; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12025; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12026; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 12027; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 12028; GFX12-WGP-NEXT: s_wait_dscnt 0x0 12029; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 12030; GFX12-WGP-NEXT: s_endpgm 12031; 12032; GFX12-CU-LABEL: flat_system_one_as_acquire_load: 12033; GFX12-CU: ; %bb.0: ; %entry 12034; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12035; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12036; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12037; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12038; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12039; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 12040; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12041; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12042; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 12043; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 12044; GFX12-CU-NEXT: s_wait_dscnt 0x0 12045; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 12046; GFX12-CU-NEXT: s_endpgm 12047 ptr %in, ptr %out) { 12048entry: 12049 %val = load atomic i32, ptr %in syncscope("one-as") acquire, align 4 12050 store i32 %val, ptr %out 12051 ret void 12052} 12053 12054define amdgpu_kernel void @flat_system_one_as_seq_cst_load( 12055; GFX7-LABEL: flat_system_one_as_seq_cst_load: 12056; GFX7: ; %bb.0: ; %entry 12057; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12058; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x2 12059; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12060; GFX7-NEXT: v_mov_b32_e32 v0, s6 12061; GFX7-NEXT: v_mov_b32_e32 v1, s7 12062; GFX7-NEXT: s_waitcnt vmcnt(0) 12063; GFX7-NEXT: flat_load_dword v2, v[0:1] glc 12064; GFX7-NEXT: s_waitcnt vmcnt(0) 12065; GFX7-NEXT: buffer_wbinvl1_vol 12066; GFX7-NEXT: v_mov_b32_e32 v0, s4 12067; GFX7-NEXT: v_mov_b32_e32 v1, s5 12068; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12069; GFX7-NEXT: flat_store_dword v[0:1], v2 12070; GFX7-NEXT: s_endpgm 12071; 12072; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_load: 12073; GFX10-WGP: ; %bb.0: ; %entry 12074; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12075; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12076; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12077; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12078; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12079; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12080; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12081; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc 12082; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12083; GFX10-WGP-NEXT: buffer_gl1_inv 12084; GFX10-WGP-NEXT: buffer_gl0_inv 12085; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 12086; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 12087; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12088; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 12089; GFX10-WGP-NEXT: s_endpgm 12090; 12091; GFX10-CU-LABEL: flat_system_one_as_seq_cst_load: 12092; GFX10-CU: ; %bb.0: ; %entry 12093; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12094; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12095; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12096; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12097; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12098; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12099; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12100; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc 12101; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12102; GFX10-CU-NEXT: buffer_gl1_inv 12103; GFX10-CU-NEXT: buffer_gl0_inv 12104; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 12105; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 12106; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12107; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 12108; GFX10-CU-NEXT: s_endpgm 12109; 12110; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_load: 12111; SKIP-CACHE-INV: ; %bb.0: ; %entry 12112; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12113; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 12114; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12115; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12116; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12117; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12118; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc 12119; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12120; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 12121; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 12122; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12123; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 12124; SKIP-CACHE-INV-NEXT: s_endpgm 12125; 12126; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_load: 12127; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12128; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12129; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12130; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12131; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12132; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12133; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 12134; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12135; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 12136; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 12137; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 12138; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12139; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 12140; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12141; 12142; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_load: 12143; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12144; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12145; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 12146; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12147; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12148; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12149; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc 12150; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12151; GFX90A-TGSPLIT-NEXT: buffer_invl2 12152; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 12153; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 12154; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 12155; GFX90A-TGSPLIT-NEXT: s_endpgm 12156; 12157; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_load: 12158; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12159; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12160; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12161; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12162; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12163; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12164; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 12165; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12166; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 12167; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 12168; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12169; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12170; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12171; 12172; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_load: 12173; GFX940-TGSPLIT: ; %bb.0: ; %entry 12174; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12175; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 12176; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12177; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12178; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12179; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 12180; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12181; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 12182; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 12183; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12184; GFX940-TGSPLIT-NEXT: s_endpgm 12185; 12186; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_load: 12187; GFX11-WGP: ; %bb.0: ; %entry 12188; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12189; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12190; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12191; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12192; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12193; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12194; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12195; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc 12196; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12197; GFX11-WGP-NEXT: buffer_gl1_inv 12198; GFX11-WGP-NEXT: buffer_gl0_inv 12199; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 12200; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 12201; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12202; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 12203; GFX11-WGP-NEXT: s_endpgm 12204; 12205; GFX11-CU-LABEL: flat_system_one_as_seq_cst_load: 12206; GFX11-CU: ; %bb.0: ; %entry 12207; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12208; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12209; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12210; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12211; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12212; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12213; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12214; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc 12215; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12216; GFX11-CU-NEXT: buffer_gl1_inv 12217; GFX11-CU-NEXT: buffer_gl0_inv 12218; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 12219; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 12220; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12221; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12222; GFX11-CU-NEXT: s_endpgm 12223; 12224; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_load: 12225; GFX12-WGP: ; %bb.0: ; %entry 12226; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12227; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12228; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12229; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12230; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12231; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12232; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12233; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12234; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12235; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 12236; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12237; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12238; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12239; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 12240; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 12241; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 12242; GFX12-WGP-NEXT: s_wait_dscnt 0x0 12243; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 12244; GFX12-WGP-NEXT: s_endpgm 12245; 12246; GFX12-CU-LABEL: flat_system_one_as_seq_cst_load: 12247; GFX12-CU: ; %bb.0: ; %entry 12248; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12249; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 12250; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12251; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12252; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12253; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12254; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12255; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12256; GFX12-CU-NEXT: s_wait_storecnt 0x0 12257; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS 12258; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12259; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12260; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12261; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 12262; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 12263; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 12264; GFX12-CU-NEXT: s_wait_dscnt 0x0 12265; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 12266; GFX12-CU-NEXT: s_endpgm 12267 ptr %in, ptr %out) { 12268entry: 12269 %val = load atomic i32, ptr %in syncscope("one-as") seq_cst, align 4 12270 store i32 %val, ptr %out 12271 ret void 12272} 12273 12274define amdgpu_kernel void @flat_system_one_as_unordered_store( 12275; GFX7-LABEL: flat_system_one_as_unordered_store: 12276; GFX7: ; %bb.0: ; %entry 12277; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12278; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12279; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12280; GFX7-NEXT: v_mov_b32_e32 v0, s6 12281; GFX7-NEXT: v_mov_b32_e32 v1, s7 12282; GFX7-NEXT: v_mov_b32_e32 v2, s4 12283; GFX7-NEXT: flat_store_dword v[0:1], v2 12284; GFX7-NEXT: s_endpgm 12285; 12286; GFX10-WGP-LABEL: flat_system_one_as_unordered_store: 12287; GFX10-WGP: ; %bb.0: ; %entry 12288; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 12289; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12290; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12291; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12292; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12293; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 12294; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 12295; GFX10-WGP-NEXT: s_endpgm 12296; 12297; GFX10-CU-LABEL: flat_system_one_as_unordered_store: 12298; GFX10-CU: ; %bb.0: ; %entry 12299; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 12300; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12301; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12302; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12303; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12304; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 12305; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 12306; GFX10-CU-NEXT: s_endpgm 12307; 12308; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_store: 12309; SKIP-CACHE-INV: ; %bb.0: ; %entry 12310; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 12311; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 12312; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12313; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12314; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12315; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 12316; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 12317; SKIP-CACHE-INV-NEXT: s_endpgm 12318; 12319; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_store: 12320; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12321; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12322; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12323; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12324; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12325; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12326; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 12327; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12328; 12329; GFX90A-TGSPLIT-LABEL: flat_system_one_as_unordered_store: 12330; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12331; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12332; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12333; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12334; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12335; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12336; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 12337; GFX90A-TGSPLIT-NEXT: s_endpgm 12338; 12339; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_store: 12340; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12341; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12342; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12343; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12344; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12345; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12346; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12347; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12348; 12349; GFX940-TGSPLIT-LABEL: flat_system_one_as_unordered_store: 12350; GFX940-TGSPLIT: ; %bb.0: ; %entry 12351; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12352; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12353; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12354; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12355; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12356; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12357; GFX940-TGSPLIT-NEXT: s_endpgm 12358; 12359; GFX11-WGP-LABEL: flat_system_one_as_unordered_store: 12360; GFX11-WGP: ; %bb.0: ; %entry 12361; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12362; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12363; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12364; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12365; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12366; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 12367; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 12368; GFX11-WGP-NEXT: s_endpgm 12369; 12370; GFX11-CU-LABEL: flat_system_one_as_unordered_store: 12371; GFX11-CU: ; %bb.0: ; %entry 12372; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12373; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12374; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12375; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12376; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12377; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 12378; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12379; GFX11-CU-NEXT: s_endpgm 12380; 12381; GFX12-WGP-LABEL: flat_system_one_as_unordered_store: 12382; GFX12-WGP: ; %bb.0: ; %entry 12383; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12384; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12385; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12386; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12387; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12388; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 12389; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 12390; GFX12-WGP-NEXT: s_endpgm 12391; 12392; GFX12-CU-LABEL: flat_system_one_as_unordered_store: 12393; GFX12-CU: ; %bb.0: ; %entry 12394; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12395; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12396; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12397; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12398; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12399; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 12400; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 12401; GFX12-CU-NEXT: s_endpgm 12402 i32 %in, ptr %out) { 12403entry: 12404 store atomic i32 %in, ptr %out syncscope("one-as") unordered, align 4 12405 ret void 12406} 12407 12408define amdgpu_kernel void @flat_system_one_as_monotonic_store( 12409; GFX7-LABEL: flat_system_one_as_monotonic_store: 12410; GFX7: ; %bb.0: ; %entry 12411; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12412; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12413; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12414; GFX7-NEXT: v_mov_b32_e32 v0, s6 12415; GFX7-NEXT: v_mov_b32_e32 v1, s7 12416; GFX7-NEXT: v_mov_b32_e32 v2, s4 12417; GFX7-NEXT: flat_store_dword v[0:1], v2 12418; GFX7-NEXT: s_endpgm 12419; 12420; GFX10-WGP-LABEL: flat_system_one_as_monotonic_store: 12421; GFX10-WGP: ; %bb.0: ; %entry 12422; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 12423; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12424; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12425; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12426; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12427; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 12428; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 12429; GFX10-WGP-NEXT: s_endpgm 12430; 12431; GFX10-CU-LABEL: flat_system_one_as_monotonic_store: 12432; GFX10-CU: ; %bb.0: ; %entry 12433; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 12434; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12435; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12436; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12437; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12438; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 12439; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 12440; GFX10-CU-NEXT: s_endpgm 12441; 12442; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_store: 12443; SKIP-CACHE-INV: ; %bb.0: ; %entry 12444; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 12445; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 12446; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12447; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12448; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12449; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 12450; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 12451; SKIP-CACHE-INV-NEXT: s_endpgm 12452; 12453; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_store: 12454; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12455; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12456; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12457; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12458; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12459; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12460; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 12461; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12462; 12463; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_store: 12464; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12465; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12466; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12467; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12468; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12469; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12470; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 12471; GFX90A-TGSPLIT-NEXT: s_endpgm 12472; 12473; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_store: 12474; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12475; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12476; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12477; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12478; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12479; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12480; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12481; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12482; 12483; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_store: 12484; GFX940-TGSPLIT: ; %bb.0: ; %entry 12485; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12486; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12487; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12488; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12489; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12490; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12491; GFX940-TGSPLIT-NEXT: s_endpgm 12492; 12493; GFX11-WGP-LABEL: flat_system_one_as_monotonic_store: 12494; GFX11-WGP: ; %bb.0: ; %entry 12495; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12496; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12497; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12498; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12499; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12500; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 12501; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 12502; GFX11-WGP-NEXT: s_endpgm 12503; 12504; GFX11-CU-LABEL: flat_system_one_as_monotonic_store: 12505; GFX11-CU: ; %bb.0: ; %entry 12506; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12507; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12508; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12509; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12510; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12511; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 12512; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12513; GFX11-CU-NEXT: s_endpgm 12514; 12515; GFX12-WGP-LABEL: flat_system_one_as_monotonic_store: 12516; GFX12-WGP: ; %bb.0: ; %entry 12517; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12518; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12519; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12520; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12521; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12522; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 12523; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12524; GFX12-WGP-NEXT: s_endpgm 12525; 12526; GFX12-CU-LABEL: flat_system_one_as_monotonic_store: 12527; GFX12-CU: ; %bb.0: ; %entry 12528; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12529; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12530; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12531; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12532; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12533; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 12534; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12535; GFX12-CU-NEXT: s_endpgm 12536 i32 %in, ptr %out) { 12537entry: 12538 store atomic i32 %in, ptr %out syncscope("one-as") monotonic, align 4 12539 ret void 12540} 12541 12542define amdgpu_kernel void @flat_system_one_as_release_store( 12543; GFX7-LABEL: flat_system_one_as_release_store: 12544; GFX7: ; %bb.0: ; %entry 12545; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12546; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12547; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12548; GFX7-NEXT: v_mov_b32_e32 v0, s6 12549; GFX7-NEXT: v_mov_b32_e32 v1, s7 12550; GFX7-NEXT: v_mov_b32_e32 v2, s4 12551; GFX7-NEXT: s_waitcnt vmcnt(0) 12552; GFX7-NEXT: flat_store_dword v[0:1], v2 12553; GFX7-NEXT: s_endpgm 12554; 12555; GFX10-WGP-LABEL: flat_system_one_as_release_store: 12556; GFX10-WGP: ; %bb.0: ; %entry 12557; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 12558; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12559; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12560; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12561; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12562; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 12563; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12564; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12565; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 12566; GFX10-WGP-NEXT: s_endpgm 12567; 12568; GFX10-CU-LABEL: flat_system_one_as_release_store: 12569; GFX10-CU: ; %bb.0: ; %entry 12570; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 12571; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12572; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12573; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12574; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12575; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 12576; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12577; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12578; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 12579; GFX10-CU-NEXT: s_endpgm 12580; 12581; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_store: 12582; SKIP-CACHE-INV: ; %bb.0: ; %entry 12583; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 12584; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 12585; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12586; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12587; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12588; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 12589; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12590; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 12591; SKIP-CACHE-INV-NEXT: s_endpgm 12592; 12593; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_store: 12594; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12595; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12596; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12597; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12598; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12599; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12600; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12601; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12602; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 12603; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12604; 12605; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_store: 12606; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12607; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12608; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12609; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12610; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12611; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12612; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12613; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12614; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 12615; GFX90A-TGSPLIT-NEXT: s_endpgm 12616; 12617; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_store: 12618; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12619; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12620; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12621; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12622; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12623; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12624; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12625; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12626; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12627; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12628; 12629; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_store: 12630; GFX940-TGSPLIT: ; %bb.0: ; %entry 12631; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12632; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12633; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12634; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12635; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12636; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12637; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12638; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12639; GFX940-TGSPLIT-NEXT: s_endpgm 12640; 12641; GFX11-WGP-LABEL: flat_system_one_as_release_store: 12642; GFX11-WGP: ; %bb.0: ; %entry 12643; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12644; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12645; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12646; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12647; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12648; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 12649; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12650; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12651; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 12652; GFX11-WGP-NEXT: s_endpgm 12653; 12654; GFX11-CU-LABEL: flat_system_one_as_release_store: 12655; GFX11-CU: ; %bb.0: ; %entry 12656; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12657; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12658; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12659; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12660; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12661; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 12662; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12663; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12664; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12665; GFX11-CU-NEXT: s_endpgm 12666; 12667; GFX12-WGP-LABEL: flat_system_one_as_release_store: 12668; GFX12-WGP: ; %bb.0: ; %entry 12669; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12670; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12671; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12672; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12673; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12674; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 12675; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12676; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12677; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12678; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12679; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12680; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12681; GFX12-WGP-NEXT: s_endpgm 12682; 12683; GFX12-CU-LABEL: flat_system_one_as_release_store: 12684; GFX12-CU: ; %bb.0: ; %entry 12685; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12686; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12687; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12688; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12689; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12690; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 12691; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12692; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12693; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12694; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12695; GFX12-CU-NEXT: s_wait_storecnt 0x0 12696; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12697; GFX12-CU-NEXT: s_endpgm 12698 i32 %in, ptr %out) { 12699entry: 12700 store atomic i32 %in, ptr %out syncscope("one-as") release, align 4 12701 ret void 12702} 12703 12704define amdgpu_kernel void @flat_system_one_as_seq_cst_store( 12705; GFX7-LABEL: flat_system_one_as_seq_cst_store: 12706; GFX7: ; %bb.0: ; %entry 12707; GFX7-NEXT: s_load_dword s4, s[8:9], 0x0 12708; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x2 12709; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12710; GFX7-NEXT: v_mov_b32_e32 v0, s6 12711; GFX7-NEXT: v_mov_b32_e32 v1, s7 12712; GFX7-NEXT: v_mov_b32_e32 v2, s4 12713; GFX7-NEXT: s_waitcnt vmcnt(0) 12714; GFX7-NEXT: flat_store_dword v[0:1], v2 12715; GFX7-NEXT: s_endpgm 12716; 12717; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_store: 12718; GFX10-WGP: ; %bb.0: ; %entry 12719; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x0 12720; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12721; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12722; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12723; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12724; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 12725; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 12726; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12727; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 12728; GFX10-WGP-NEXT: s_endpgm 12729; 12730; GFX10-CU-LABEL: flat_system_one_as_seq_cst_store: 12731; GFX10-CU: ; %bb.0: ; %entry 12732; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x0 12733; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12734; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12735; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12736; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12737; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 12738; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 12739; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 12740; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 12741; GFX10-CU-NEXT: s_endpgm 12742; 12743; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_store: 12744; SKIP-CACHE-INV: ; %bb.0: ; %entry 12745; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x0 12746; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 12747; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12748; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12749; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12750; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 12751; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 12752; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 12753; SKIP-CACHE-INV-NEXT: s_endpgm 12754; 12755; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_store: 12756; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12757; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12758; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12759; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12760; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12761; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12762; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 12763; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12764; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 12765; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12766; 12767; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_store: 12768; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12769; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x0 12770; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8 12771; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12772; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12773; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12774; GFX90A-TGSPLIT-NEXT: buffer_wbl2 12775; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12776; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 12777; GFX90A-TGSPLIT-NEXT: s_endpgm 12778; 12779; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_store: 12780; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12781; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12782; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12783; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12784; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12785; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12786; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12787; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 12788; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12789; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12790; 12791; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_store: 12792; GFX940-TGSPLIT: ; %bb.0: ; %entry 12793; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 12794; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 12795; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12796; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12797; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12798; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 12799; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 12800; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 12801; GFX940-TGSPLIT-NEXT: s_endpgm 12802; 12803; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_store: 12804; GFX11-WGP: ; %bb.0: ; %entry 12805; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12806; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12807; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12808; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12809; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12810; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 12811; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 12812; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 12813; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 12814; GFX11-WGP-NEXT: s_endpgm 12815; 12816; GFX11-CU-LABEL: flat_system_one_as_seq_cst_store: 12817; GFX11-CU: ; %bb.0: ; %entry 12818; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12819; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12820; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12821; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12822; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12823; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 12824; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 12825; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 12826; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 12827; GFX11-CU-NEXT: s_endpgm 12828; 12829; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_store: 12830; GFX12-WGP: ; %bb.0: ; %entry 12831; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x0 12832; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12833; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12834; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12835; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12836; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 12837; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 12838; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 12839; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 12840; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 12841; GFX12-WGP-NEXT: s_wait_storecnt 0x0 12842; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12843; GFX12-WGP-NEXT: s_endpgm 12844; 12845; GFX12-CU-LABEL: flat_system_one_as_seq_cst_store: 12846; GFX12-CU: ; %bb.0: ; %entry 12847; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x0 12848; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 12849; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12850; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12851; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12852; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 12853; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 12854; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 12855; GFX12-CU-NEXT: s_wait_samplecnt 0x0 12856; GFX12-CU-NEXT: s_wait_loadcnt 0x0 12857; GFX12-CU-NEXT: s_wait_storecnt 0x0 12858; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS 12859; GFX12-CU-NEXT: s_endpgm 12860 i32 %in, ptr %out) { 12861entry: 12862 store atomic i32 %in, ptr %out syncscope("one-as") seq_cst, align 4 12863 ret void 12864} 12865 12866define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( 12867; GFX7-LABEL: flat_system_one_as_monotonic_atomicrmw: 12868; GFX7: ; %bb.0: ; %entry 12869; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12870; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 12871; GFX7-NEXT: s_waitcnt lgkmcnt(0) 12872; GFX7-NEXT: v_mov_b32_e32 v0, s6 12873; GFX7-NEXT: v_mov_b32_e32 v1, s7 12874; GFX7-NEXT: v_mov_b32_e32 v2, s4 12875; GFX7-NEXT: flat_atomic_swap v[0:1], v2 12876; GFX7-NEXT: s_endpgm 12877; 12878; GFX10-WGP-LABEL: flat_system_one_as_monotonic_atomicrmw: 12879; GFX10-WGP: ; %bb.0: ; %entry 12880; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12881; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 12882; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 12883; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 12884; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 12885; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 12886; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 12887; GFX10-WGP-NEXT: s_endpgm 12888; 12889; GFX10-CU-LABEL: flat_system_one_as_monotonic_atomicrmw: 12890; GFX10-CU: ; %bb.0: ; %entry 12891; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12892; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 12893; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 12894; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 12895; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 12896; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 12897; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 12898; GFX10-CU-NEXT: s_endpgm 12899; 12900; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_atomicrmw: 12901; SKIP-CACHE-INV: ; %bb.0: ; %entry 12902; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12903; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 12904; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 12905; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 12906; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 12907; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 12908; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 12909; SKIP-CACHE-INV-NEXT: s_endpgm 12910; 12911; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: 12912; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 12913; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12914; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 12915; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12916; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12917; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12918; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 12919; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 12920; 12921; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: 12922; GFX90A-TGSPLIT: ; %bb.0: ; %entry 12923; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 12924; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 12925; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12926; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 12927; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 12928; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 12929; GFX90A-TGSPLIT-NEXT: s_endpgm 12930; 12931; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: 12932; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 12933; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12934; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 12935; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12936; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12937; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12938; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 12939; GFX940-NOTTGSPLIT-NEXT: s_endpgm 12940; 12941; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: 12942; GFX940-TGSPLIT: ; %bb.0: ; %entry 12943; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 12944; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 12945; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 12946; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 12947; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 12948; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 12949; GFX940-TGSPLIT-NEXT: s_endpgm 12950; 12951; GFX11-WGP-LABEL: flat_system_one_as_monotonic_atomicrmw: 12952; GFX11-WGP: ; %bb.0: ; %entry 12953; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12954; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 12955; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 12956; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 12957; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 12958; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 12959; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 12960; GFX11-WGP-NEXT: s_endpgm 12961; 12962; GFX11-CU-LABEL: flat_system_one_as_monotonic_atomicrmw: 12963; GFX11-CU: ; %bb.0: ; %entry 12964; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12965; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 12966; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 12967; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 12968; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 12969; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 12970; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 12971; GFX11-CU-NEXT: s_endpgm 12972; 12973; GFX12-WGP-LABEL: flat_system_one_as_monotonic_atomicrmw: 12974; GFX12-WGP: ; %bb.0: ; %entry 12975; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12976; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 12977; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 12978; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 12979; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 12980; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 12981; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 12982; GFX12-WGP-NEXT: s_endpgm 12983; 12984; GFX12-CU-LABEL: flat_system_one_as_monotonic_atomicrmw: 12985; GFX12-CU: ; %bb.0: ; %entry 12986; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 12987; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 12988; GFX12-CU-NEXT: s_wait_kmcnt 0x0 12989; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 12990; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 12991; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 12992; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 12993; GFX12-CU-NEXT: s_endpgm 12994 ptr %out, i32 %in) { 12995entry: 12996 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") monotonic 12997 ret void 12998} 12999 13000define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( 13001; GFX7-LABEL: flat_system_one_as_acquire_atomicrmw: 13002; GFX7: ; %bb.0: ; %entry 13003; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13004; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13005; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13006; GFX7-NEXT: v_mov_b32_e32 v0, s6 13007; GFX7-NEXT: v_mov_b32_e32 v1, s7 13008; GFX7-NEXT: v_mov_b32_e32 v2, s4 13009; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13010; GFX7-NEXT: s_waitcnt vmcnt(0) 13011; GFX7-NEXT: buffer_wbinvl1_vol 13012; GFX7-NEXT: s_endpgm 13013; 13014; GFX10-WGP-LABEL: flat_system_one_as_acquire_atomicrmw: 13015; GFX10-WGP: ; %bb.0: ; %entry 13016; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13017; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 13018; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13019; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 13020; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13021; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 13022; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 13023; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13024; GFX10-WGP-NEXT: buffer_gl1_inv 13025; GFX10-WGP-NEXT: buffer_gl0_inv 13026; GFX10-WGP-NEXT: s_endpgm 13027; 13028; GFX10-CU-LABEL: flat_system_one_as_acquire_atomicrmw: 13029; GFX10-CU: ; %bb.0: ; %entry 13030; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13031; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 13032; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13033; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 13034; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13035; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 13036; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 13037; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13038; GFX10-CU-NEXT: buffer_gl1_inv 13039; GFX10-CU-NEXT: buffer_gl0_inv 13040; GFX10-CU-NEXT: s_endpgm 13041; 13042; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_atomicrmw: 13043; SKIP-CACHE-INV: ; %bb.0: ; %entry 13044; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13045; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 13046; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13047; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 13048; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 13049; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 13050; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 13051; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13052; SKIP-CACHE-INV-NEXT: s_endpgm 13053; 13054; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: 13055; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13056; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13057; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13058; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13059; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13060; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13061; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13062; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13063; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13064; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13065; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13066; 13067; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: 13068; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13069; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13070; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13071; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13072; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13073; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13074; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13075; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13076; GFX90A-TGSPLIT-NEXT: buffer_invl2 13077; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13078; GFX90A-TGSPLIT-NEXT: s_endpgm 13079; 13080; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: 13081; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13082; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13083; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13084; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13085; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13086; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13087; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13088; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13089; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13090; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13091; 13092; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: 13093; GFX940-TGSPLIT: ; %bb.0: ; %entry 13094; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13095; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13096; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13097; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13098; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13099; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13100; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13101; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13102; GFX940-TGSPLIT-NEXT: s_endpgm 13103; 13104; GFX11-WGP-LABEL: flat_system_one_as_acquire_atomicrmw: 13105; GFX11-WGP: ; %bb.0: ; %entry 13106; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13107; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13108; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13109; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 13110; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13111; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 13112; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 13113; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13114; GFX11-WGP-NEXT: buffer_gl1_inv 13115; GFX11-WGP-NEXT: buffer_gl0_inv 13116; GFX11-WGP-NEXT: s_endpgm 13117; 13118; GFX11-CU-LABEL: flat_system_one_as_acquire_atomicrmw: 13119; GFX11-CU: ; %bb.0: ; %entry 13120; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13121; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13122; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13123; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 13124; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13125; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 13126; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 13127; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13128; GFX11-CU-NEXT: buffer_gl1_inv 13129; GFX11-CU-NEXT: buffer_gl0_inv 13130; GFX11-CU-NEXT: s_endpgm 13131; 13132; GFX12-WGP-LABEL: flat_system_one_as_acquire_atomicrmw: 13133; GFX12-WGP: ; %bb.0: ; %entry 13134; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13135; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13136; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13137; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 13138; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13139; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 13140; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13141; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13142; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13143; GFX12-WGP-NEXT: s_endpgm 13144; 13145; GFX12-CU-LABEL: flat_system_one_as_acquire_atomicrmw: 13146; GFX12-CU: ; %bb.0: ; %entry 13147; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13148; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13149; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13150; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 13151; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13152; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 13153; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13154; GFX12-CU-NEXT: s_wait_storecnt 0x0 13155; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13156; GFX12-CU-NEXT: s_endpgm 13157 ptr %out, i32 %in) { 13158entry: 13159 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire 13160 ret void 13161} 13162 13163define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( 13164; GFX7-LABEL: flat_system_one_as_release_atomicrmw: 13165; GFX7: ; %bb.0: ; %entry 13166; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13167; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13168; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13169; GFX7-NEXT: v_mov_b32_e32 v0, s6 13170; GFX7-NEXT: v_mov_b32_e32 v1, s7 13171; GFX7-NEXT: v_mov_b32_e32 v2, s4 13172; GFX7-NEXT: s_waitcnt vmcnt(0) 13173; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13174; GFX7-NEXT: s_endpgm 13175; 13176; GFX10-WGP-LABEL: flat_system_one_as_release_atomicrmw: 13177; GFX10-WGP: ; %bb.0: ; %entry 13178; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13179; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 13180; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13181; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 13182; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13183; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 13184; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13185; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13186; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 13187; GFX10-WGP-NEXT: s_endpgm 13188; 13189; GFX10-CU-LABEL: flat_system_one_as_release_atomicrmw: 13190; GFX10-CU: ; %bb.0: ; %entry 13191; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13192; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 13193; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13194; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 13195; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13196; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 13197; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13198; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13199; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 13200; GFX10-CU-NEXT: s_endpgm 13201; 13202; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_atomicrmw: 13203; SKIP-CACHE-INV: ; %bb.0: ; %entry 13204; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13205; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 13206; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13207; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 13208; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 13209; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 13210; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13211; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 13212; SKIP-CACHE-INV-NEXT: s_endpgm 13213; 13214; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: 13215; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13216; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13217; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13218; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13219; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13220; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13221; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13222; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13223; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13224; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13225; 13226; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: 13227; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13228; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13229; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13230; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13231; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13232; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13233; GFX90A-TGSPLIT-NEXT: buffer_wbl2 13234; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13235; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13236; GFX90A-TGSPLIT-NEXT: s_endpgm 13237; 13238; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: 13239; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13240; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13241; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13242; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13243; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13244; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13245; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13246; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13247; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13248; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13249; 13250; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: 13251; GFX940-TGSPLIT: ; %bb.0: ; %entry 13252; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13253; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13254; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13255; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13256; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13257; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13258; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13259; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13260; GFX940-TGSPLIT-NEXT: s_endpgm 13261; 13262; GFX11-WGP-LABEL: flat_system_one_as_release_atomicrmw: 13263; GFX11-WGP: ; %bb.0: ; %entry 13264; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13265; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13266; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13267; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 13268; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13269; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 13270; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13271; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13272; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 13273; GFX11-WGP-NEXT: s_endpgm 13274; 13275; GFX11-CU-LABEL: flat_system_one_as_release_atomicrmw: 13276; GFX11-CU: ; %bb.0: ; %entry 13277; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13278; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13279; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13280; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 13281; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13282; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 13283; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13284; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13285; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 13286; GFX11-CU-NEXT: s_endpgm 13287; 13288; GFX12-WGP-LABEL: flat_system_one_as_release_atomicrmw: 13289; GFX12-WGP: ; %bb.0: ; %entry 13290; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13291; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13292; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13293; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 13294; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13295; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 13296; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 13297; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13298; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13299; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13300; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13301; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13302; GFX12-WGP-NEXT: s_endpgm 13303; 13304; GFX12-CU-LABEL: flat_system_one_as_release_atomicrmw: 13305; GFX12-CU: ; %bb.0: ; %entry 13306; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13307; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13308; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13309; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 13310; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13311; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 13312; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 13313; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13314; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13315; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13316; GFX12-CU-NEXT: s_wait_storecnt 0x0 13317; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13318; GFX12-CU-NEXT: s_endpgm 13319 ptr %out, i32 %in) { 13320entry: 13321 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") release 13322 ret void 13323} 13324 13325define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( 13326; GFX7-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13327; GFX7: ; %bb.0: ; %entry 13328; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13329; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13330; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13331; GFX7-NEXT: v_mov_b32_e32 v0, s6 13332; GFX7-NEXT: v_mov_b32_e32 v1, s7 13333; GFX7-NEXT: v_mov_b32_e32 v2, s4 13334; GFX7-NEXT: s_waitcnt vmcnt(0) 13335; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13336; GFX7-NEXT: s_waitcnt vmcnt(0) 13337; GFX7-NEXT: buffer_wbinvl1_vol 13338; GFX7-NEXT: s_endpgm 13339; 13340; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13341; GFX10-WGP: ; %bb.0: ; %entry 13342; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13343; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 13344; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13345; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 13346; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13347; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 13348; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13349; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13350; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 13351; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13352; GFX10-WGP-NEXT: buffer_gl1_inv 13353; GFX10-WGP-NEXT: buffer_gl0_inv 13354; GFX10-WGP-NEXT: s_endpgm 13355; 13356; GFX10-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13357; GFX10-CU: ; %bb.0: ; %entry 13358; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13359; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 13360; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13361; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 13362; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13363; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 13364; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13365; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13366; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 13367; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13368; GFX10-CU-NEXT: buffer_gl1_inv 13369; GFX10-CU-NEXT: buffer_gl0_inv 13370; GFX10-CU-NEXT: s_endpgm 13371; 13372; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13373; SKIP-CACHE-INV: ; %bb.0: ; %entry 13374; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13375; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 13376; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13377; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 13378; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 13379; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 13380; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13381; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 13382; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13383; SKIP-CACHE-INV-NEXT: s_endpgm 13384; 13385; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13386; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13387; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13388; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13389; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13390; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13391; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13392; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13393; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13394; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13395; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13396; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13397; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13398; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13399; 13400; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13401; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13402; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13403; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13404; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13405; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13406; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13407; GFX90A-TGSPLIT-NEXT: buffer_wbl2 13408; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13409; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13410; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13411; GFX90A-TGSPLIT-NEXT: buffer_invl2 13412; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13413; GFX90A-TGSPLIT-NEXT: s_endpgm 13414; 13415; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13416; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13417; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13418; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13419; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13420; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13421; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13422; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13423; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13424; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13425; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13426; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13427; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13428; 13429; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13430; GFX940-TGSPLIT: ; %bb.0: ; %entry 13431; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13432; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13433; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13434; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13435; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13436; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13437; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13438; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13439; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13440; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13441; GFX940-TGSPLIT-NEXT: s_endpgm 13442; 13443; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13444; GFX11-WGP: ; %bb.0: ; %entry 13445; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13446; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13447; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13448; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 13449; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13450; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 13451; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13452; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13453; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 13454; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13455; GFX11-WGP-NEXT: buffer_gl1_inv 13456; GFX11-WGP-NEXT: buffer_gl0_inv 13457; GFX11-WGP-NEXT: s_endpgm 13458; 13459; GFX11-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13460; GFX11-CU: ; %bb.0: ; %entry 13461; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13462; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13463; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13464; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 13465; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13466; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 13467; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13468; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13469; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 13470; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13471; GFX11-CU-NEXT: buffer_gl1_inv 13472; GFX11-CU-NEXT: buffer_gl0_inv 13473; GFX11-CU-NEXT: s_endpgm 13474; 13475; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13476; GFX12-WGP: ; %bb.0: ; %entry 13477; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13478; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13479; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13480; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 13481; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13482; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 13483; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 13484; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13485; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13486; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13487; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13488; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13489; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13490; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13491; GFX12-WGP-NEXT: s_endpgm 13492; 13493; GFX12-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw: 13494; GFX12-CU: ; %bb.0: ; %entry 13495; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13496; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13497; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13498; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 13499; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13500; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 13501; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 13502; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13503; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13504; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13505; GFX12-CU-NEXT: s_wait_storecnt 0x0 13506; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13507; GFX12-CU-NEXT: s_wait_storecnt 0x0 13508; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13509; GFX12-CU-NEXT: s_endpgm 13510 ptr %out, i32 %in) { 13511entry: 13512 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel 13513 ret void 13514} 13515 13516define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( 13517; GFX7-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13518; GFX7: ; %bb.0: ; %entry 13519; GFX7-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13520; GFX7-NEXT: s_load_dword s4, s[8:9], 0x2 13521; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13522; GFX7-NEXT: v_mov_b32_e32 v0, s6 13523; GFX7-NEXT: v_mov_b32_e32 v1, s7 13524; GFX7-NEXT: v_mov_b32_e32 v2, s4 13525; GFX7-NEXT: s_waitcnt vmcnt(0) 13526; GFX7-NEXT: flat_atomic_swap v[0:1], v2 13527; GFX7-NEXT: s_waitcnt vmcnt(0) 13528; GFX7-NEXT: buffer_wbinvl1_vol 13529; GFX7-NEXT: s_endpgm 13530; 13531; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13532; GFX10-WGP: ; %bb.0: ; %entry 13533; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13534; GFX10-WGP-NEXT: s_load_dword s4, s[8:9], 0x8 13535; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13536; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 13537; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 13538; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s4 13539; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13540; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13541; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 13542; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13543; GFX10-WGP-NEXT: buffer_gl1_inv 13544; GFX10-WGP-NEXT: buffer_gl0_inv 13545; GFX10-WGP-NEXT: s_endpgm 13546; 13547; GFX10-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13548; GFX10-CU: ; %bb.0: ; %entry 13549; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13550; GFX10-CU-NEXT: s_load_dword s4, s[8:9], 0x8 13551; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13552; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 13553; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 13554; GFX10-CU-NEXT: v_mov_b32_e32 v2, s4 13555; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13556; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13557; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 13558; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13559; GFX10-CU-NEXT: buffer_gl1_inv 13560; GFX10-CU-NEXT: buffer_gl0_inv 13561; GFX10-CU-NEXT: s_endpgm 13562; 13563; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13564; SKIP-CACHE-INV: ; %bb.0: ; %entry 13565; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13566; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[4:5], 0x2 13567; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13568; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 13569; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 13570; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 13571; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13572; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 13573; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13574; SKIP-CACHE-INV-NEXT: s_endpgm 13575; 13576; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13577; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13578; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13579; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13580; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13581; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13582; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13583; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13584; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13585; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13586; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13587; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13588; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13589; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13590; 13591; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13592; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13593; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 13594; GFX90A-TGSPLIT-NEXT: s_load_dword s4, s[8:9], 0x8 13595; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13596; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 13597; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 13598; GFX90A-TGSPLIT-NEXT: buffer_wbl2 13599; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13600; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 13601; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13602; GFX90A-TGSPLIT-NEXT: buffer_invl2 13603; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13604; GFX90A-TGSPLIT-NEXT: s_endpgm 13605; 13606; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13607; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13608; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13609; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13610; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13611; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13612; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13613; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13614; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13615; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13616; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13617; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13618; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13619; 13620; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13621; GFX940-TGSPLIT: ; %bb.0: ; %entry 13622; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 13623; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 13624; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13625; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] 13626; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 13627; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 13628; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13629; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 13630; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13631; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13632; GFX940-TGSPLIT-NEXT: s_endpgm 13633; 13634; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13635; GFX11-WGP: ; %bb.0: ; %entry 13636; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13637; GFX11-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13638; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13639; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 13640; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 13641; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 13642; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13643; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13644; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 13645; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13646; GFX11-WGP-NEXT: buffer_gl1_inv 13647; GFX11-WGP-NEXT: buffer_gl0_inv 13648; GFX11-WGP-NEXT: s_endpgm 13649; 13650; GFX11-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13651; GFX11-CU: ; %bb.0: ; %entry 13652; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13653; GFX11-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13654; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13655; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 13656; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 13657; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 13658; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13659; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13660; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 13661; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 13662; GFX11-CU-NEXT: buffer_gl1_inv 13663; GFX11-CU-NEXT: buffer_gl0_inv 13664; GFX11-CU-NEXT: s_endpgm 13665; 13666; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13667; GFX12-WGP: ; %bb.0: ; %entry 13668; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13669; GFX12-WGP-NEXT: s_load_b32 s0, s[4:5], 0x8 13670; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13671; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 13672; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3 13673; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0 13674; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 13675; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 13676; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 13677; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13678; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13679; GFX12-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13680; GFX12-WGP-NEXT: s_wait_storecnt 0x0 13681; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13682; GFX12-WGP-NEXT: s_endpgm 13683; 13684; GFX12-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw: 13685; GFX12-CU: ; %bb.0: ; %entry 13686; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 13687; GFX12-CU-NEXT: s_load_b32 s0, s[4:5], 0x8 13688; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13689; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 13690; GFX12-CU-NEXT: v_mov_b32_e32 v1, s3 13691; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0 13692; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 13693; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 13694; GFX12-CU-NEXT: s_wait_samplecnt 0x0 13695; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13696; GFX12-CU-NEXT: s_wait_storecnt 0x0 13697; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS 13698; GFX12-CU-NEXT: s_wait_storecnt 0x0 13699; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13700; GFX12-CU-NEXT: s_endpgm 13701 ptr %out, i32 %in) { 13702entry: 13703 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst 13704 ret void 13705} 13706 13707define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( 13708; GFX7-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13709; GFX7: ; %bb.0: ; %entry 13710; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13711; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 13712; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13713; GFX7-NEXT: v_mov_b32_e32 v0, s4 13714; GFX7-NEXT: v_mov_b32_e32 v1, s5 13715; GFX7-NEXT: v_mov_b32_e32 v2, s6 13716; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13717; GFX7-NEXT: s_waitcnt vmcnt(0) 13718; GFX7-NEXT: buffer_wbinvl1_vol 13719; GFX7-NEXT: v_mov_b32_e32 v0, s4 13720; GFX7-NEXT: v_mov_b32_e32 v1, s5 13721; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13722; GFX7-NEXT: flat_store_dword v[0:1], v2 13723; GFX7-NEXT: s_endpgm 13724; 13725; GFX10-WGP-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13726; GFX10-WGP: ; %bb.0: ; %entry 13727; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13728; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13729; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13730; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 13731; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 13732; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 13733; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13734; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13735; GFX10-WGP-NEXT: buffer_gl1_inv 13736; GFX10-WGP-NEXT: buffer_gl0_inv 13737; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 13738; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 13739; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13740; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 13741; GFX10-WGP-NEXT: s_endpgm 13742; 13743; GFX10-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13744; GFX10-CU: ; %bb.0: ; %entry 13745; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13746; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13747; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13748; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 13749; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 13750; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 13751; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13752; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13753; GFX10-CU-NEXT: buffer_gl1_inv 13754; GFX10-CU-NEXT: buffer_gl0_inv 13755; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 13756; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 13757; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13758; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 13759; GFX10-CU-NEXT: s_endpgm 13760; 13761; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13762; SKIP-CACHE-INV: ; %bb.0: ; %entry 13763; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13764; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 13765; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13766; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 13767; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 13768; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 13769; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13770; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13771; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 13772; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 13773; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13774; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 13775; SKIP-CACHE-INV-NEXT: s_endpgm 13776; 13777; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13778; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13779; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13780; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13781; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13782; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 13783; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 13784; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13785; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13786; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 13787; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 13788; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 13789; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13790; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 13791; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 13792; 13793; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13794; GFX90A-TGSPLIT: ; %bb.0: ; %entry 13795; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13796; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13797; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13798; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 13799; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 13800; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13801; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13802; GFX90A-TGSPLIT-NEXT: buffer_invl2 13803; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 13804; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 13805; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 13806; GFX90A-TGSPLIT-NEXT: s_endpgm 13807; 13808; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13809; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 13810; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13811; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13812; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13813; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 13814; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 13815; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 13816; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13817; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 13818; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 13819; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13820; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 13821; GFX940-NOTTGSPLIT-NEXT: s_endpgm 13822; 13823; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13824; GFX940-TGSPLIT: ; %bb.0: ; %entry 13825; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13826; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 13827; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13828; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 13829; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 13830; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 13831; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 13832; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 13833; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 13834; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 13835; GFX940-TGSPLIT-NEXT: s_endpgm 13836; 13837; GFX11-WGP-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13838; GFX11-WGP: ; %bb.0: ; %entry 13839; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13840; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13841; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13842; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 13843; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 13844; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 13845; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 13846; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 13847; GFX11-WGP-NEXT: buffer_gl1_inv 13848; GFX11-WGP-NEXT: buffer_gl0_inv 13849; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 13850; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 13851; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 13852; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 13853; GFX11-WGP-NEXT: s_endpgm 13854; 13855; GFX11-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13856; GFX11-CU: ; %bb.0: ; %entry 13857; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13858; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13859; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13860; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 13861; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 13862; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 13863; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 13864; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 13865; GFX11-CU-NEXT: buffer_gl1_inv 13866; GFX11-CU-NEXT: buffer_gl0_inv 13867; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 13868; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 13869; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 13870; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 13871; GFX11-CU-NEXT: s_endpgm 13872; 13873; GFX12-WGP-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13874; GFX12-WGP: ; %bb.0: ; %entry 13875; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13876; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 13877; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 13878; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 13879; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 13880; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 13881; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 13882; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 13883; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 13884; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 13885; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 13886; GFX12-WGP-NEXT: s_wait_dscnt 0x0 13887; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 13888; GFX12-WGP-NEXT: s_endpgm 13889; 13890; GFX12-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw: 13891; GFX12-CU: ; %bb.0: ; %entry 13892; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 13893; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 13894; GFX12-CU-NEXT: s_wait_kmcnt 0x0 13895; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 13896; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 13897; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 13898; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 13899; GFX12-CU-NEXT: s_wait_loadcnt 0x0 13900; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 13901; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 13902; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 13903; GFX12-CU-NEXT: s_wait_dscnt 0x0 13904; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 13905; GFX12-CU-NEXT: s_endpgm 13906 ptr %out, i32 %in) { 13907entry: 13908 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire 13909 store i32 %val, ptr %out, align 4 13910 ret void 13911} 13912 13913define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( 13914; GFX7-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 13915; GFX7: ; %bb.0: ; %entry 13916; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13917; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 13918; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13919; GFX7-NEXT: v_mov_b32_e32 v0, s4 13920; GFX7-NEXT: v_mov_b32_e32 v1, s5 13921; GFX7-NEXT: v_mov_b32_e32 v2, s6 13922; GFX7-NEXT: s_waitcnt vmcnt(0) 13923; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13924; GFX7-NEXT: s_waitcnt vmcnt(0) 13925; GFX7-NEXT: buffer_wbinvl1_vol 13926; GFX7-NEXT: v_mov_b32_e32 v0, s4 13927; GFX7-NEXT: v_mov_b32_e32 v1, s5 13928; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13929; GFX7-NEXT: flat_store_dword v[0:1], v2 13930; GFX7-NEXT: s_endpgm 13931; 13932; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 13933; GFX10-WGP: ; %bb.0: ; %entry 13934; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13935; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 13936; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13937; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 13938; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 13939; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 13940; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13941; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 13942; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13943; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 13944; GFX10-WGP-NEXT: buffer_gl1_inv 13945; GFX10-WGP-NEXT: buffer_gl0_inv 13946; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 13947; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 13948; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 13949; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 13950; GFX10-WGP-NEXT: s_endpgm 13951; 13952; GFX10-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 13953; GFX10-CU: ; %bb.0: ; %entry 13954; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13955; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 13956; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13957; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 13958; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 13959; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 13960; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13961; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 13962; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13963; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 13964; GFX10-CU-NEXT: buffer_gl1_inv 13965; GFX10-CU-NEXT: buffer_gl0_inv 13966; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 13967; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 13968; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 13969; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 13970; GFX10-CU-NEXT: s_endpgm 13971; 13972; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 13973; SKIP-CACHE-INV: ; %bb.0: ; %entry 13974; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 13975; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 13976; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13977; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 13978; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 13979; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 13980; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13981; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13982; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 13983; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 13984; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 13985; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 13986; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 13987; SKIP-CACHE-INV-NEXT: s_endpgm 13988; 13989; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 13990; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 13991; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 13992; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 13993; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 13994; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 13995; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 13996; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 13997; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 13998; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 13999; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14000; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14001; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14002; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14003; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14004; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 14005; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14006; 14007; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14008; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14009; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14010; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 14011; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14012; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14013; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 14014; GFX90A-TGSPLIT-NEXT: buffer_wbl2 14015; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14016; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14017; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14018; GFX90A-TGSPLIT-NEXT: buffer_invl2 14019; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14020; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14021; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 14022; GFX90A-TGSPLIT-NEXT: s_endpgm 14023; 14024; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14025; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14026; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14027; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14028; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14029; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14030; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 14031; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14032; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14033; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 14034; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14035; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14036; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14038; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 14039; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14040; 14041; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14042; GFX940-TGSPLIT: ; %bb.0: ; %entry 14043; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14044; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14045; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14046; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14047; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 14048; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14049; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14050; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 14051; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14052; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14053; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14054; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 14055; GFX940-TGSPLIT-NEXT: s_endpgm 14056; 14057; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14058; GFX11-WGP: ; %bb.0: ; %entry 14059; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14060; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14061; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14062; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14063; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14064; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 14065; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14066; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14067; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 14068; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14069; GFX11-WGP-NEXT: buffer_gl1_inv 14070; GFX11-WGP-NEXT: buffer_gl0_inv 14071; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14072; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14073; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14074; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 14075; GFX11-WGP-NEXT: s_endpgm 14076; 14077; GFX11-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14078; GFX11-CU: ; %bb.0: ; %entry 14079; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14080; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14081; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14082; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14083; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14084; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 14085; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14086; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14087; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 14088; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14089; GFX11-CU-NEXT: buffer_gl1_inv 14090; GFX11-CU-NEXT: buffer_gl0_inv 14091; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14092; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14093; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14094; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 14095; GFX11-CU-NEXT: s_endpgm 14096; 14097; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14098; GFX12-WGP: ; %bb.0: ; %entry 14099; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14100; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14101; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14102; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14103; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14104; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 14105; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 14106; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14107; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14108; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14109; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14110; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 14111; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14112; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14113; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14114; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14115; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14116; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14117; GFX12-WGP-NEXT: s_wait_dscnt 0x0 14118; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 14119; GFX12-WGP-NEXT: s_endpgm 14120; 14121; GFX12-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: 14122; GFX12-CU: ; %bb.0: ; %entry 14123; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14124; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14125; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14126; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14127; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14128; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 14129; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 14130; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14131; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14132; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14133; GFX12-CU-NEXT: s_wait_storecnt 0x0 14134; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 14135; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14136; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14137; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14138; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14139; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14140; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14141; GFX12-CU-NEXT: s_wait_dscnt 0x0 14142; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 14143; GFX12-CU-NEXT: s_endpgm 14144 ptr %out, i32 %in) { 14145entry: 14146 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel 14147 store i32 %val, ptr %out, align 4 14148 ret void 14149} 14150 14151define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( 14152; GFX7-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14153; GFX7: ; %bb.0: ; %entry 14154; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14155; GFX7-NEXT: s_load_dword s6, s[8:9], 0x2 14156; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14157; GFX7-NEXT: v_mov_b32_e32 v0, s4 14158; GFX7-NEXT: v_mov_b32_e32 v1, s5 14159; GFX7-NEXT: v_mov_b32_e32 v2, s6 14160; GFX7-NEXT: s_waitcnt vmcnt(0) 14161; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14162; GFX7-NEXT: s_waitcnt vmcnt(0) 14163; GFX7-NEXT: buffer_wbinvl1_vol 14164; GFX7-NEXT: v_mov_b32_e32 v0, s4 14165; GFX7-NEXT: v_mov_b32_e32 v1, s5 14166; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14167; GFX7-NEXT: flat_store_dword v[0:1], v2 14168; GFX7-NEXT: s_endpgm 14169; 14170; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14171; GFX10-WGP: ; %bb.0: ; %entry 14172; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14173; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 14174; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14175; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 14176; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 14177; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s6 14178; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14179; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14180; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14181; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14182; GFX10-WGP-NEXT: buffer_gl1_inv 14183; GFX10-WGP-NEXT: buffer_gl0_inv 14184; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 14185; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 14186; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14187; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 14188; GFX10-WGP-NEXT: s_endpgm 14189; 14190; GFX10-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14191; GFX10-CU: ; %bb.0: ; %entry 14192; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14193; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 14194; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14195; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 14196; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 14197; GFX10-CU-NEXT: v_mov_b32_e32 v2, s6 14198; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14199; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14200; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14201; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14202; GFX10-CU-NEXT: buffer_gl1_inv 14203; GFX10-CU-NEXT: buffer_gl0_inv 14204; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 14205; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 14206; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14207; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 14208; GFX10-CU-NEXT: s_endpgm 14209; 14210; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14211; SKIP-CACHE-INV: ; %bb.0: ; %entry 14212; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14213; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[4:5], 0x2 14214; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14215; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 14216; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 14217; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 14218; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14219; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14220; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14221; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 14222; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 14223; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14224; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 14225; SKIP-CACHE-INV-NEXT: s_endpgm 14226; 14227; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14228; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14229; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14230; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 14231; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14232; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14233; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s6 14234; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 14235; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14236; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14237; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14238; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14239; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14240; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14241; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14242; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 14243; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14244; 14245; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14246; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14247; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14248; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0x8 14249; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14250; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14251; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s6 14252; GFX90A-TGSPLIT-NEXT: buffer_wbl2 14253; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14254; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 14255; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14256; GFX90A-TGSPLIT-NEXT: buffer_invl2 14257; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14258; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14259; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 14260; GFX90A-TGSPLIT-NEXT: s_endpgm 14261; 14262; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14263; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14264; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14265; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14266; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14267; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14268; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 14269; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14270; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14271; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 14272; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14273; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14274; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14275; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14276; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 14277; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14278; 14279; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14280; GFX940-TGSPLIT: ; %bb.0: ; %entry 14281; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14282; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 14283; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14284; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14285; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 14286; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 14287; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14288; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 14289; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14290; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14291; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14292; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 14293; GFX940-TGSPLIT-NEXT: s_endpgm 14294; 14295; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14296; GFX11-WGP: ; %bb.0: ; %entry 14297; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14298; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14299; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14300; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14301; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14302; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 14303; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14304; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14305; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 14306; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 14307; GFX11-WGP-NEXT: buffer_gl1_inv 14308; GFX11-WGP-NEXT: buffer_gl0_inv 14309; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14310; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14311; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14312; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 14313; GFX11-WGP-NEXT: s_endpgm 14314; 14315; GFX11-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14316; GFX11-CU: ; %bb.0: ; %entry 14317; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14318; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14319; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14320; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14321; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14322; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 14323; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14324; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14325; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc 14326; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 14327; GFX11-CU-NEXT: buffer_gl1_inv 14328; GFX11-CU-NEXT: buffer_gl0_inv 14329; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14330; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14331; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14332; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 14333; GFX11-CU-NEXT: s_endpgm 14334; 14335; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14336; GFX12-WGP: ; %bb.0: ; %entry 14337; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14338; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 14339; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14340; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14341; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14342; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s2 14343; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 14344; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14345; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14346; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14347; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14348; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 14349; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 14350; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 14351; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 14352; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14353; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14354; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14355; GFX12-WGP-NEXT: s_wait_dscnt 0x0 14356; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 14357; GFX12-WGP-NEXT: s_endpgm 14358; 14359; GFX12-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: 14360; GFX12-CU: ; %bb.0: ; %entry 14361; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14362; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 14363; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14364; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14365; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14366; GFX12-CU-NEXT: v_mov_b32_e32 v2, s2 14367; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 14368; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14369; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14370; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14371; GFX12-CU-NEXT: s_wait_storecnt 0x0 14372; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 14373; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 14374; GFX12-CU-NEXT: s_wait_samplecnt 0x0 14375; GFX12-CU-NEXT: s_wait_loadcnt 0x0 14376; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14377; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14378; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14379; GFX12-CU-NEXT: s_wait_dscnt 0x0 14380; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 14381; GFX12-CU-NEXT: s_endpgm 14382 ptr %out, i32 %in) { 14383entry: 14384 %val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst 14385 store i32 %val, ptr %out, align 4 14386 ret void 14387} 14388 14389define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( 14390; GFX7-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14391; GFX7: ; %bb.0: ; %entry 14392; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14393; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14394; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14395; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14396; GFX7-NEXT: s_mov_b64 s[10:11], 16 14397; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14398; GFX7-NEXT: s_mov_b32 s4, s8 14399; GFX7-NEXT: s_mov_b32 s5, s9 14400; GFX7-NEXT: s_mov_b32 s9, s10 14401; GFX7-NEXT: s_mov_b32 s8, s11 14402; GFX7-NEXT: s_add_u32 s4, s4, s9 14403; GFX7-NEXT: s_addc_u32 s8, s5, s8 14404; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14405; GFX7-NEXT: s_mov_b32 s5, s8 14406; GFX7-NEXT: v_mov_b32_e32 v2, s7 14407; GFX7-NEXT: v_mov_b32_e32 v0, s6 14408; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14409; GFX7-NEXT: v_mov_b32_e32 v3, v0 14410; GFX7-NEXT: v_mov_b32_e32 v0, s4 14411; GFX7-NEXT: v_mov_b32_e32 v1, s5 14412; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14413; GFX7-NEXT: s_endpgm 14414; 14415; GFX10-WGP-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14416; GFX10-WGP: ; %bb.0: ; %entry 14417; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 14418; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14419; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 14420; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 14421; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 14422; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14423; GFX10-WGP-NEXT: s_mov_b32 s4, s8 14424; GFX10-WGP-NEXT: s_mov_b32 s5, s9 14425; GFX10-WGP-NEXT: s_mov_b32 s9, s10 14426; GFX10-WGP-NEXT: s_mov_b32 s8, s11 14427; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 14428; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 14429; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14430; GFX10-WGP-NEXT: s_mov_b32 s5, s8 14431; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 14432; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 14433; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14434; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 14435; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 14436; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 14437; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14438; GFX10-WGP-NEXT: s_endpgm 14439; 14440; GFX10-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14441; GFX10-CU: ; %bb.0: ; %entry 14442; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 14443; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14444; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 14445; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 14446; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 14447; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14448; GFX10-CU-NEXT: s_mov_b32 s4, s8 14449; GFX10-CU-NEXT: s_mov_b32 s5, s9 14450; GFX10-CU-NEXT: s_mov_b32 s9, s10 14451; GFX10-CU-NEXT: s_mov_b32 s8, s11 14452; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 14453; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 14454; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14455; GFX10-CU-NEXT: s_mov_b32 s5, s8 14456; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 14457; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 14458; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14459; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 14460; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 14461; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 14462; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14463; GFX10-CU-NEXT: s_endpgm 14464; 14465; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14466; SKIP-CACHE-INV: ; %bb.0: ; %entry 14467; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 14468; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 14469; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 14470; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 14471; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 14472; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14473; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 14474; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 14475; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 14476; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 14477; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 14478; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 14479; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 14480; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 14481; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 14482; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 14483; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14484; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 14485; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 14486; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 14487; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14488; SKIP-CACHE-INV-NEXT: s_endpgm 14489; 14490; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14491; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14492; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14493; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14494; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14495; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14496; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14497; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14498; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14499; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14500; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14501; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 14502; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14503; 14504; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14505; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14506; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14507; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14508; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14509; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14510; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14511; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14512; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14513; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14514; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14515; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 14516; GFX90A-TGSPLIT-NEXT: s_endpgm 14517; 14518; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14519; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14520; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14521; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14522; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14523; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14524; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14525; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 14526; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14527; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14528; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14529; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 14530; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14531; 14532; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14533; GFX940-TGSPLIT: ; %bb.0: ; %entry 14534; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14535; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14536; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14537; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14538; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14539; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 14540; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14541; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14542; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14543; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 14544; GFX940-TGSPLIT-NEXT: s_endpgm 14545; 14546; GFX11-WGP-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14547; GFX11-WGP: ; %bb.0: ; %entry 14548; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14549; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14550; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14551; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14552; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 14553; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 14554; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14555; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 14556; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14557; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14558; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 14559; GFX11-WGP-NEXT: s_endpgm 14560; 14561; GFX11-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14562; GFX11-CU: ; %bb.0: ; %entry 14563; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14564; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14565; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14566; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14567; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 14568; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 14569; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14570; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 14571; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14572; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14573; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 14574; GFX11-CU-NEXT: s_endpgm 14575; 14576; GFX12-WGP-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14577; GFX12-WGP: ; %bb.0: ; %entry 14578; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14579; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14580; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14581; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14582; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 14583; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 14584; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14585; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 14586; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14587; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14588; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 14589; GFX12-WGP-NEXT: s_endpgm 14590; 14591; GFX12-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: 14592; GFX12-CU: ; %bb.0: ; %entry 14593; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14594; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14595; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14596; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14597; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 14598; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 14599; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14600; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 14601; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14602; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14603; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 14604; GFX12-CU-NEXT: s_endpgm 14605 ptr %out, i32 %in, i32 %old) { 14606entry: 14607 %gep = getelementptr i32, ptr %out, i32 4 14608 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 14609 ret void 14610} 14611 14612define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( 14613; GFX7-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14614; GFX7: ; %bb.0: ; %entry 14615; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14616; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14617; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14618; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14619; GFX7-NEXT: s_mov_b64 s[10:11], 16 14620; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14621; GFX7-NEXT: s_mov_b32 s4, s8 14622; GFX7-NEXT: s_mov_b32 s5, s9 14623; GFX7-NEXT: s_mov_b32 s9, s10 14624; GFX7-NEXT: s_mov_b32 s8, s11 14625; GFX7-NEXT: s_add_u32 s4, s4, s9 14626; GFX7-NEXT: s_addc_u32 s8, s5, s8 14627; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14628; GFX7-NEXT: s_mov_b32 s5, s8 14629; GFX7-NEXT: v_mov_b32_e32 v2, s7 14630; GFX7-NEXT: v_mov_b32_e32 v0, s6 14631; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14632; GFX7-NEXT: v_mov_b32_e32 v3, v0 14633; GFX7-NEXT: v_mov_b32_e32 v0, s4 14634; GFX7-NEXT: v_mov_b32_e32 v1, s5 14635; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14636; GFX7-NEXT: s_waitcnt vmcnt(0) 14637; GFX7-NEXT: buffer_wbinvl1_vol 14638; GFX7-NEXT: s_endpgm 14639; 14640; GFX10-WGP-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14641; GFX10-WGP: ; %bb.0: ; %entry 14642; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 14643; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14644; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 14645; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 14646; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 14647; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14648; GFX10-WGP-NEXT: s_mov_b32 s4, s8 14649; GFX10-WGP-NEXT: s_mov_b32 s5, s9 14650; GFX10-WGP-NEXT: s_mov_b32 s9, s10 14651; GFX10-WGP-NEXT: s_mov_b32 s8, s11 14652; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 14653; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 14654; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14655; GFX10-WGP-NEXT: s_mov_b32 s5, s8 14656; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 14657; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 14658; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14659; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 14660; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 14661; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 14662; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14663; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14664; GFX10-WGP-NEXT: buffer_gl1_inv 14665; GFX10-WGP-NEXT: buffer_gl0_inv 14666; GFX10-WGP-NEXT: s_endpgm 14667; 14668; GFX10-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14669; GFX10-CU: ; %bb.0: ; %entry 14670; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 14671; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14672; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 14673; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 14674; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 14675; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14676; GFX10-CU-NEXT: s_mov_b32 s4, s8 14677; GFX10-CU-NEXT: s_mov_b32 s5, s9 14678; GFX10-CU-NEXT: s_mov_b32 s9, s10 14679; GFX10-CU-NEXT: s_mov_b32 s8, s11 14680; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 14681; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 14682; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14683; GFX10-CU-NEXT: s_mov_b32 s5, s8 14684; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 14685; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 14686; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14687; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 14688; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 14689; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 14690; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14691; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14692; GFX10-CU-NEXT: buffer_gl1_inv 14693; GFX10-CU-NEXT: buffer_gl0_inv 14694; GFX10-CU-NEXT: s_endpgm 14695; 14696; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14697; SKIP-CACHE-INV: ; %bb.0: ; %entry 14698; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 14699; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 14700; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 14701; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 14702; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 14703; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14704; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 14705; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 14706; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 14707; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 14708; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 14709; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 14710; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 14711; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 14712; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 14713; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 14714; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14715; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 14716; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 14717; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 14718; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14719; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14720; SKIP-CACHE-INV-NEXT: s_endpgm 14721; 14722; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14723; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14724; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14725; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14726; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14727; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14728; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14729; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14730; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14731; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14732; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14733; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 14734; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14735; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 14736; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 14737; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14738; 14739; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14740; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14741; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14742; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14743; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14744; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14745; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14746; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14747; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14748; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14749; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14750; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 14751; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14752; GFX90A-TGSPLIT-NEXT: buffer_invl2 14753; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 14754; GFX90A-TGSPLIT-NEXT: s_endpgm 14755; 14756; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14757; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 14758; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14759; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14760; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14761; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14762; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14763; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 14764; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14765; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14766; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14767; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 14768; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14769; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 14770; GFX940-NOTTGSPLIT-NEXT: s_endpgm 14771; 14772; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14773; GFX940-TGSPLIT: ; %bb.0: ; %entry 14774; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 14775; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 14776; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 14777; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14778; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 14779; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 14780; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14781; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14782; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 14783; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 14784; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 14785; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 14786; GFX940-TGSPLIT-NEXT: s_endpgm 14787; 14788; GFX11-WGP-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14789; GFX11-WGP: ; %bb.0: ; %entry 14790; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14791; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14792; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14793; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 14794; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 14795; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 14796; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14797; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 14798; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 14799; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 14800; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 14801; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14802; GFX11-WGP-NEXT: buffer_gl1_inv 14803; GFX11-WGP-NEXT: buffer_gl0_inv 14804; GFX11-WGP-NEXT: s_endpgm 14805; 14806; GFX11-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14807; GFX11-CU: ; %bb.0: ; %entry 14808; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14809; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14810; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14811; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 14812; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 14813; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 14814; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14815; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 14816; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 14817; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 14818; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 14819; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 14820; GFX11-CU-NEXT: buffer_gl1_inv 14821; GFX11-CU-NEXT: buffer_gl0_inv 14822; GFX11-CU-NEXT: s_endpgm 14823; 14824; GFX12-WGP-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14825; GFX12-WGP: ; %bb.0: ; %entry 14826; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14827; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 14828; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 14829; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 14830; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 14831; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 14832; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14833; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 14834; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 14835; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 14836; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 14837; GFX12-WGP-NEXT: s_wait_storecnt 0x0 14838; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 14839; GFX12-WGP-NEXT: s_endpgm 14840; 14841; GFX12-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: 14842; GFX12-CU: ; %bb.0: ; %entry 14843; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 14844; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 14845; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 14846; GFX12-CU-NEXT: s_wait_kmcnt 0x0 14847; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 14848; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 14849; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14850; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 14851; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 14852; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 14853; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 14854; GFX12-CU-NEXT: s_wait_storecnt 0x0 14855; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 14856; GFX12-CU-NEXT: s_endpgm 14857 ptr %out, i32 %in, i32 %old) { 14858entry: 14859 %gep = getelementptr i32, ptr %out, i32 4 14860 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 14861 ret void 14862} 14863 14864define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( 14865; GFX7-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14866; GFX7: ; %bb.0: ; %entry 14867; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 14868; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14869; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 14870; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 14871; GFX7-NEXT: s_mov_b64 s[10:11], 16 14872; GFX7-NEXT: s_waitcnt lgkmcnt(0) 14873; GFX7-NEXT: s_mov_b32 s4, s8 14874; GFX7-NEXT: s_mov_b32 s5, s9 14875; GFX7-NEXT: s_mov_b32 s9, s10 14876; GFX7-NEXT: s_mov_b32 s8, s11 14877; GFX7-NEXT: s_add_u32 s4, s4, s9 14878; GFX7-NEXT: s_addc_u32 s8, s5, s8 14879; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14880; GFX7-NEXT: s_mov_b32 s5, s8 14881; GFX7-NEXT: v_mov_b32_e32 v2, s7 14882; GFX7-NEXT: v_mov_b32_e32 v0, s6 14883; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14884; GFX7-NEXT: v_mov_b32_e32 v3, v0 14885; GFX7-NEXT: v_mov_b32_e32 v0, s4 14886; GFX7-NEXT: v_mov_b32_e32 v1, s5 14887; GFX7-NEXT: s_waitcnt vmcnt(0) 14888; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14889; GFX7-NEXT: s_endpgm 14890; 14891; GFX10-WGP-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14892; GFX10-WGP: ; %bb.0: ; %entry 14893; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 14894; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14895; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 14896; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 14897; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 14898; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 14899; GFX10-WGP-NEXT: s_mov_b32 s4, s8 14900; GFX10-WGP-NEXT: s_mov_b32 s5, s9 14901; GFX10-WGP-NEXT: s_mov_b32 s9, s10 14902; GFX10-WGP-NEXT: s_mov_b32 s8, s11 14903; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 14904; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 14905; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14906; GFX10-WGP-NEXT: s_mov_b32 s5, s8 14907; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 14908; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 14909; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14910; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 14911; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 14912; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 14913; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 14914; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 14915; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14916; GFX10-WGP-NEXT: s_endpgm 14917; 14918; GFX10-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14919; GFX10-CU: ; %bb.0: ; %entry 14920; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 14921; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 14922; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 14923; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 14924; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 14925; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 14926; GFX10-CU-NEXT: s_mov_b32 s4, s8 14927; GFX10-CU-NEXT: s_mov_b32 s5, s9 14928; GFX10-CU-NEXT: s_mov_b32 s9, s10 14929; GFX10-CU-NEXT: s_mov_b32 s8, s11 14930; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 14931; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 14932; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 14933; GFX10-CU-NEXT: s_mov_b32 s5, s8 14934; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 14935; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 14936; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14937; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 14938; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 14939; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 14940; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 14941; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 14942; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14943; GFX10-CU-NEXT: s_endpgm 14944; 14945; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14946; SKIP-CACHE-INV: ; %bb.0: ; %entry 14947; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 14948; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 14949; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 14950; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 14951; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 14952; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 14953; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 14954; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 14955; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 14956; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 14957; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 14958; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 14959; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 14960; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 14961; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 14962; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 14963; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14964; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 14965; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 14966; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 14967; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 14968; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 14969; SKIP-CACHE-INV-NEXT: s_endpgm 14970; 14971; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14972; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 14973; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14974; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14975; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14976; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14977; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14978; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14979; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14980; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14981; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14982; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 14983; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 14984; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 14985; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 14986; 14987; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 14988; GFX90A-TGSPLIT: ; %bb.0: ; %entry 14989; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 14990; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 14991; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 14992; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 14993; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 14994; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 14995; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 14996; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 14997; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 14998; GFX90A-TGSPLIT-NEXT: buffer_wbl2 14999; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15000; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15001; GFX90A-TGSPLIT-NEXT: s_endpgm 15002; 15003; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15004; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15005; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15006; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15007; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15008; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15009; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15010; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15011; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15012; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15013; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15014; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15015; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15016; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15017; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15018; 15019; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15020; GFX940-TGSPLIT: ; %bb.0: ; %entry 15021; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15022; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15023; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15024; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15025; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15026; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15027; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15028; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15029; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15030; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15031; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15032; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15033; GFX940-TGSPLIT-NEXT: s_endpgm 15034; 15035; GFX11-WGP-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15036; GFX11-WGP: ; %bb.0: ; %entry 15037; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15038; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15039; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15040; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15041; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 15042; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 15043; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15044; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 15045; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 15046; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 15047; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15048; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15049; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15050; GFX11-WGP-NEXT: s_endpgm 15051; 15052; GFX11-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15053; GFX11-CU: ; %bb.0: ; %entry 15054; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15055; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15056; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15057; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15058; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 15059; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 15060; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15061; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 15062; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 15063; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 15064; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15065; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15066; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15067; GFX11-CU-NEXT: s_endpgm 15068; 15069; GFX12-WGP-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15070; GFX12-WGP: ; %bb.0: ; %entry 15071; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15072; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15073; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15074; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15075; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 15076; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 15077; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15078; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 15079; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 15080; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 15081; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15082; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15083; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15084; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15085; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15086; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15087; GFX12-WGP-NEXT: s_endpgm 15088; 15089; GFX12-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg: 15090; GFX12-CU: ; %bb.0: ; %entry 15091; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15092; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15093; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15094; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15095; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 15096; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 15097; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15098; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 15099; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 15100; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 15101; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15102; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15103; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15104; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15105; GFX12-CU-NEXT: s_wait_storecnt 0x0 15106; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15107; GFX12-CU-NEXT: s_endpgm 15108 ptr %out, i32 %in, i32 %old) { 15109entry: 15110 %gep = getelementptr i32, ptr %out, i32 4 15111 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 15112 ret void 15113} 15114 15115define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( 15116; GFX7-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15117; GFX7: ; %bb.0: ; %entry 15118; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15119; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15120; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15121; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15122; GFX7-NEXT: s_mov_b64 s[10:11], 16 15123; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15124; GFX7-NEXT: s_mov_b32 s4, s8 15125; GFX7-NEXT: s_mov_b32 s5, s9 15126; GFX7-NEXT: s_mov_b32 s9, s10 15127; GFX7-NEXT: s_mov_b32 s8, s11 15128; GFX7-NEXT: s_add_u32 s4, s4, s9 15129; GFX7-NEXT: s_addc_u32 s8, s5, s8 15130; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15131; GFX7-NEXT: s_mov_b32 s5, s8 15132; GFX7-NEXT: v_mov_b32_e32 v2, s7 15133; GFX7-NEXT: v_mov_b32_e32 v0, s6 15134; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15135; GFX7-NEXT: v_mov_b32_e32 v3, v0 15136; GFX7-NEXT: v_mov_b32_e32 v0, s4 15137; GFX7-NEXT: v_mov_b32_e32 v1, s5 15138; GFX7-NEXT: s_waitcnt vmcnt(0) 15139; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15140; GFX7-NEXT: s_waitcnt vmcnt(0) 15141; GFX7-NEXT: buffer_wbinvl1_vol 15142; GFX7-NEXT: s_endpgm 15143; 15144; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15145; GFX10-WGP: ; %bb.0: ; %entry 15146; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 15147; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15148; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 15149; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 15150; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 15151; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15152; GFX10-WGP-NEXT: s_mov_b32 s4, s8 15153; GFX10-WGP-NEXT: s_mov_b32 s5, s9 15154; GFX10-WGP-NEXT: s_mov_b32 s9, s10 15155; GFX10-WGP-NEXT: s_mov_b32 s8, s11 15156; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 15157; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 15158; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15159; GFX10-WGP-NEXT: s_mov_b32 s5, s8 15160; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 15161; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 15162; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15163; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 15164; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 15165; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 15166; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15167; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15168; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15169; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15170; GFX10-WGP-NEXT: buffer_gl1_inv 15171; GFX10-WGP-NEXT: buffer_gl0_inv 15172; GFX10-WGP-NEXT: s_endpgm 15173; 15174; GFX10-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15175; GFX10-CU: ; %bb.0: ; %entry 15176; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 15177; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15178; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 15179; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 15180; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 15181; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15182; GFX10-CU-NEXT: s_mov_b32 s4, s8 15183; GFX10-CU-NEXT: s_mov_b32 s5, s9 15184; GFX10-CU-NEXT: s_mov_b32 s9, s10 15185; GFX10-CU-NEXT: s_mov_b32 s8, s11 15186; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 15187; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 15188; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15189; GFX10-CU-NEXT: s_mov_b32 s5, s8 15190; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 15191; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 15192; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15193; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 15194; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 15195; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 15196; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15197; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15198; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15199; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15200; GFX10-CU-NEXT: buffer_gl1_inv 15201; GFX10-CU-NEXT: buffer_gl0_inv 15202; GFX10-CU-NEXT: s_endpgm 15203; 15204; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15205; SKIP-CACHE-INV: ; %bb.0: ; %entry 15206; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 15207; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 15208; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 15209; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 15210; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 15211; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15212; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 15213; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 15214; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 15215; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 15216; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 15217; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 15218; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 15219; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 15220; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 15221; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 15222; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15223; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 15224; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 15225; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 15226; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15227; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15228; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15229; SKIP-CACHE-INV-NEXT: s_endpgm 15230; 15231; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15232; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15233; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15234; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15235; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15236; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15237; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15238; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15239; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15240; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15241; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15242; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15243; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15244; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15245; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15246; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15247; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15248; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15249; 15250; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15251; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15252; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15253; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15254; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15255; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15256; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15257; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15258; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15259; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15260; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15261; GFX90A-TGSPLIT-NEXT: buffer_wbl2 15262; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15263; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15264; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15265; GFX90A-TGSPLIT-NEXT: buffer_invl2 15266; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15267; GFX90A-TGSPLIT-NEXT: s_endpgm 15268; 15269; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15270; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15271; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15272; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15273; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15274; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15275; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15276; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15277; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15278; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15279; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15280; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15281; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15282; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15283; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15284; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15285; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15286; 15287; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15288; GFX940-TGSPLIT: ; %bb.0: ; %entry 15289; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15290; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15291; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15292; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15293; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15294; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15295; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15296; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15297; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15298; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15299; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15300; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15301; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15302; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15303; GFX940-TGSPLIT-NEXT: s_endpgm 15304; 15305; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15306; GFX11-WGP: ; %bb.0: ; %entry 15307; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15308; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15309; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15310; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15311; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 15312; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 15313; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15314; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 15315; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 15316; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 15317; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15318; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15319; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15320; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15321; GFX11-WGP-NEXT: buffer_gl1_inv 15322; GFX11-WGP-NEXT: buffer_gl0_inv 15323; GFX11-WGP-NEXT: s_endpgm 15324; 15325; GFX11-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15326; GFX11-CU: ; %bb.0: ; %entry 15327; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15328; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15329; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15330; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15331; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 15332; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 15333; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15334; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 15335; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 15336; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 15337; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15338; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15339; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15340; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15341; GFX11-CU-NEXT: buffer_gl1_inv 15342; GFX11-CU-NEXT: buffer_gl0_inv 15343; GFX11-CU-NEXT: s_endpgm 15344; 15345; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15346; GFX12-WGP: ; %bb.0: ; %entry 15347; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15348; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15349; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15350; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15351; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 15352; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 15353; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15354; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 15355; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 15356; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 15357; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15358; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15359; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15360; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15361; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15362; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15363; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15364; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15365; GFX12-WGP-NEXT: s_endpgm 15366; 15367; GFX12-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: 15368; GFX12-CU: ; %bb.0: ; %entry 15369; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15370; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15371; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15372; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15373; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 15374; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 15375; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15376; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 15377; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 15378; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 15379; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15380; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15381; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15382; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15383; GFX12-CU-NEXT: s_wait_storecnt 0x0 15384; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15385; GFX12-CU-NEXT: s_wait_storecnt 0x0 15386; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15387; GFX12-CU-NEXT: s_endpgm 15388 ptr %out, i32 %in, i32 %old) { 15389entry: 15390 %gep = getelementptr i32, ptr %out, i32 4 15391 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 15392 ret void 15393} 15394 15395define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( 15396; GFX7-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15397; GFX7: ; %bb.0: ; %entry 15398; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15399; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15400; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15401; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15402; GFX7-NEXT: s_mov_b64 s[10:11], 16 15403; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15404; GFX7-NEXT: s_mov_b32 s4, s8 15405; GFX7-NEXT: s_mov_b32 s5, s9 15406; GFX7-NEXT: s_mov_b32 s9, s10 15407; GFX7-NEXT: s_mov_b32 s8, s11 15408; GFX7-NEXT: s_add_u32 s4, s4, s9 15409; GFX7-NEXT: s_addc_u32 s8, s5, s8 15410; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15411; GFX7-NEXT: s_mov_b32 s5, s8 15412; GFX7-NEXT: v_mov_b32_e32 v2, s7 15413; GFX7-NEXT: v_mov_b32_e32 v0, s6 15414; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15415; GFX7-NEXT: v_mov_b32_e32 v3, v0 15416; GFX7-NEXT: v_mov_b32_e32 v0, s4 15417; GFX7-NEXT: v_mov_b32_e32 v1, s5 15418; GFX7-NEXT: s_waitcnt vmcnt(0) 15419; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15420; GFX7-NEXT: s_waitcnt vmcnt(0) 15421; GFX7-NEXT: buffer_wbinvl1_vol 15422; GFX7-NEXT: s_endpgm 15423; 15424; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15425; GFX10-WGP: ; %bb.0: ; %entry 15426; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 15427; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15428; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 15429; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 15430; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 15431; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15432; GFX10-WGP-NEXT: s_mov_b32 s4, s8 15433; GFX10-WGP-NEXT: s_mov_b32 s5, s9 15434; GFX10-WGP-NEXT: s_mov_b32 s9, s10 15435; GFX10-WGP-NEXT: s_mov_b32 s8, s11 15436; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 15437; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 15438; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15439; GFX10-WGP-NEXT: s_mov_b32 s5, s8 15440; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 15441; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 15442; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15443; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 15444; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 15445; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 15446; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 15447; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15448; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15449; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15450; GFX10-WGP-NEXT: buffer_gl1_inv 15451; GFX10-WGP-NEXT: buffer_gl0_inv 15452; GFX10-WGP-NEXT: s_endpgm 15453; 15454; GFX10-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15455; GFX10-CU: ; %bb.0: ; %entry 15456; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 15457; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15458; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 15459; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 15460; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 15461; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15462; GFX10-CU-NEXT: s_mov_b32 s4, s8 15463; GFX10-CU-NEXT: s_mov_b32 s5, s9 15464; GFX10-CU-NEXT: s_mov_b32 s9, s10 15465; GFX10-CU-NEXT: s_mov_b32 s8, s11 15466; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 15467; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 15468; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15469; GFX10-CU-NEXT: s_mov_b32 s5, s8 15470; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 15471; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 15472; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15473; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 15474; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 15475; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 15476; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 15477; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15478; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15479; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15480; GFX10-CU-NEXT: buffer_gl1_inv 15481; GFX10-CU-NEXT: buffer_gl0_inv 15482; GFX10-CU-NEXT: s_endpgm 15483; 15484; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15485; SKIP-CACHE-INV: ; %bb.0: ; %entry 15486; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 15487; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 15488; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 15489; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 15490; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 15491; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15492; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 15493; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 15494; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 15495; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 15496; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 15497; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 15498; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 15499; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 15500; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 15501; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 15502; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15503; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 15504; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 15505; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 15506; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15507; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15508; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15509; SKIP-CACHE-INV-NEXT: s_endpgm 15510; 15511; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15512; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15513; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15514; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15515; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15516; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15517; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15518; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15519; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15520; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15521; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15522; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 15523; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15524; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15525; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15526; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15527; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15528; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15529; 15530; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15531; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15532; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15533; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15534; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15535; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15536; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15537; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15538; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15539; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15540; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15541; GFX90A-TGSPLIT-NEXT: buffer_wbl2 15542; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15543; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15544; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15545; GFX90A-TGSPLIT-NEXT: buffer_invl2 15546; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15547; GFX90A-TGSPLIT-NEXT: s_endpgm 15548; 15549; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15550; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15551; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15552; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15553; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15554; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15555; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15556; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15557; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15558; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15559; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15560; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15561; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15562; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15563; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15564; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15565; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15566; 15567; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15568; GFX940-TGSPLIT: ; %bb.0: ; %entry 15569; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15570; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15571; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15572; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15573; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15574; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15575; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15576; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15577; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15578; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 15579; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15580; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15581; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15582; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15583; GFX940-TGSPLIT-NEXT: s_endpgm 15584; 15585; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15586; GFX11-WGP: ; %bb.0: ; %entry 15587; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15588; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15589; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15590; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15591; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 15592; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 15593; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15594; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 15595; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 15596; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 15597; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 15598; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15599; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15600; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15601; GFX11-WGP-NEXT: buffer_gl1_inv 15602; GFX11-WGP-NEXT: buffer_gl0_inv 15603; GFX11-WGP-NEXT: s_endpgm 15604; 15605; GFX11-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15606; GFX11-CU: ; %bb.0: ; %entry 15607; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15608; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15609; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15610; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15611; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 15612; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 15613; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15614; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 15615; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 15616; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 15617; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 15618; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15619; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15620; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15621; GFX11-CU-NEXT: buffer_gl1_inv 15622; GFX11-CU-NEXT: buffer_gl0_inv 15623; GFX11-CU-NEXT: s_endpgm 15624; 15625; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15626; GFX12-WGP: ; %bb.0: ; %entry 15627; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15628; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15629; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15630; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15631; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 15632; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 15633; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15634; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 15635; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 15636; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 15637; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 15638; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 15639; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 15640; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 15641; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15642; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15643; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15644; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15645; GFX12-WGP-NEXT: s_endpgm 15646; 15647; GFX12-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: 15648; GFX12-CU: ; %bb.0: ; %entry 15649; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15650; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15651; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15652; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15653; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 15654; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 15655; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15656; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 15657; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 15658; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 15659; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 15660; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 15661; GFX12-CU-NEXT: s_wait_samplecnt 0x0 15662; GFX12-CU-NEXT: s_wait_loadcnt 0x0 15663; GFX12-CU-NEXT: s_wait_storecnt 0x0 15664; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15665; GFX12-CU-NEXT: s_wait_storecnt 0x0 15666; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15667; GFX12-CU-NEXT: s_endpgm 15668 ptr %out, i32 %in, i32 %old) { 15669entry: 15670 %gep = getelementptr i32, ptr %out, i32 4 15671 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 15672 ret void 15673} 15674 15675define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( 15676; GFX7-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15677; GFX7: ; %bb.0: ; %entry 15678; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15679; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15680; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15681; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15682; GFX7-NEXT: s_mov_b64 s[10:11], 16 15683; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15684; GFX7-NEXT: s_mov_b32 s4, s8 15685; GFX7-NEXT: s_mov_b32 s5, s9 15686; GFX7-NEXT: s_mov_b32 s9, s10 15687; GFX7-NEXT: s_mov_b32 s8, s11 15688; GFX7-NEXT: s_add_u32 s4, s4, s9 15689; GFX7-NEXT: s_addc_u32 s8, s5, s8 15690; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15691; GFX7-NEXT: s_mov_b32 s5, s8 15692; GFX7-NEXT: v_mov_b32_e32 v2, s7 15693; GFX7-NEXT: v_mov_b32_e32 v0, s6 15694; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15695; GFX7-NEXT: v_mov_b32_e32 v3, v0 15696; GFX7-NEXT: v_mov_b32_e32 v0, s4 15697; GFX7-NEXT: v_mov_b32_e32 v1, s5 15698; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15699; GFX7-NEXT: s_waitcnt vmcnt(0) 15700; GFX7-NEXT: buffer_wbinvl1_vol 15701; GFX7-NEXT: s_endpgm 15702; 15703; GFX10-WGP-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15704; GFX10-WGP: ; %bb.0: ; %entry 15705; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 15706; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15707; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 15708; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 15709; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 15710; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15711; GFX10-WGP-NEXT: s_mov_b32 s4, s8 15712; GFX10-WGP-NEXT: s_mov_b32 s5, s9 15713; GFX10-WGP-NEXT: s_mov_b32 s9, s10 15714; GFX10-WGP-NEXT: s_mov_b32 s8, s11 15715; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 15716; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 15717; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15718; GFX10-WGP-NEXT: s_mov_b32 s5, s8 15719; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 15720; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 15721; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15722; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 15723; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 15724; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 15725; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15726; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15727; GFX10-WGP-NEXT: buffer_gl1_inv 15728; GFX10-WGP-NEXT: buffer_gl0_inv 15729; GFX10-WGP-NEXT: s_endpgm 15730; 15731; GFX10-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15732; GFX10-CU: ; %bb.0: ; %entry 15733; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 15734; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15735; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 15736; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 15737; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 15738; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15739; GFX10-CU-NEXT: s_mov_b32 s4, s8 15740; GFX10-CU-NEXT: s_mov_b32 s5, s9 15741; GFX10-CU-NEXT: s_mov_b32 s9, s10 15742; GFX10-CU-NEXT: s_mov_b32 s8, s11 15743; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 15744; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 15745; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15746; GFX10-CU-NEXT: s_mov_b32 s5, s8 15747; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 15748; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 15749; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15750; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 15751; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 15752; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 15753; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15754; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 15755; GFX10-CU-NEXT: buffer_gl1_inv 15756; GFX10-CU-NEXT: buffer_gl0_inv 15757; GFX10-CU-NEXT: s_endpgm 15758; 15759; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15760; SKIP-CACHE-INV: ; %bb.0: ; %entry 15761; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 15762; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 15763; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 15764; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 15765; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 15766; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 15767; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 15768; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 15769; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 15770; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 15771; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 15772; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 15773; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 15774; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 15775; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 15776; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 15777; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15778; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 15779; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 15780; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 15781; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15782; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 15783; SKIP-CACHE-INV-NEXT: s_endpgm 15784; 15785; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15786; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 15787; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15788; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15789; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15790; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15791; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15792; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15793; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15794; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15795; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15796; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15797; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15798; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 15799; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 15800; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 15801; 15802; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15803; GFX90A-TGSPLIT: ; %bb.0: ; %entry 15804; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 15805; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 15806; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 15807; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15808; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 15809; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 15810; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15811; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15812; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 15813; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 15814; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15815; GFX90A-TGSPLIT-NEXT: buffer_invl2 15816; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 15817; GFX90A-TGSPLIT-NEXT: s_endpgm 15818; 15819; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15820; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 15821; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15822; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15823; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15824; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15825; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15826; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15827; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15828; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15829; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15830; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15831; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 15832; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 15833; GFX940-NOTTGSPLIT-NEXT: s_endpgm 15834; 15835; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15836; GFX940-TGSPLIT: ; %bb.0: ; %entry 15837; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 15838; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 15839; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 15840; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 15841; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 15842; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 15843; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15844; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 15845; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 15846; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 15847; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 15848; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 15849; GFX940-TGSPLIT-NEXT: s_endpgm 15850; 15851; GFX11-WGP-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15852; GFX11-WGP: ; %bb.0: ; %entry 15853; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15854; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15855; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15856; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 15857; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 15858; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 15859; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15860; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 15861; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 15862; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 15863; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15864; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15865; GFX11-WGP-NEXT: buffer_gl1_inv 15866; GFX11-WGP-NEXT: buffer_gl0_inv 15867; GFX11-WGP-NEXT: s_endpgm 15868; 15869; GFX11-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15870; GFX11-CU: ; %bb.0: ; %entry 15871; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15872; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15873; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15874; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 15875; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 15876; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 15877; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15878; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 15879; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 15880; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 15881; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 15882; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 15883; GFX11-CU-NEXT: buffer_gl1_inv 15884; GFX11-CU-NEXT: buffer_gl0_inv 15885; GFX11-CU-NEXT: s_endpgm 15886; 15887; GFX12-WGP-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15888; GFX12-WGP: ; %bb.0: ; %entry 15889; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15890; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 15891; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 15892; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 15893; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 15894; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 15895; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15896; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 15897; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 15898; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 15899; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15900; GFX12-WGP-NEXT: s_wait_storecnt 0x0 15901; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 15902; GFX12-WGP-NEXT: s_endpgm 15903; 15904; GFX12-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: 15905; GFX12-CU: ; %bb.0: ; %entry 15906; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 15907; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 15908; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 15909; GFX12-CU-NEXT: s_wait_kmcnt 0x0 15910; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 15911; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 15912; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15913; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 15914; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 15915; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 15916; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 15917; GFX12-CU-NEXT: s_wait_storecnt 0x0 15918; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 15919; GFX12-CU-NEXT: s_endpgm 15920 ptr %out, i32 %in, i32 %old) { 15921entry: 15922 %gep = getelementptr i32, ptr %out, i32 4 15923 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire 15924 ret void 15925} 15926 15927define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( 15928; GFX7-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 15929; GFX7: ; %bb.0: ; %entry 15930; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 15931; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15932; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 15933; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 15934; GFX7-NEXT: s_mov_b64 s[10:11], 16 15935; GFX7-NEXT: s_waitcnt lgkmcnt(0) 15936; GFX7-NEXT: s_mov_b32 s4, s8 15937; GFX7-NEXT: s_mov_b32 s5, s9 15938; GFX7-NEXT: s_mov_b32 s9, s10 15939; GFX7-NEXT: s_mov_b32 s8, s11 15940; GFX7-NEXT: s_add_u32 s4, s4, s9 15941; GFX7-NEXT: s_addc_u32 s8, s5, s8 15942; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15943; GFX7-NEXT: s_mov_b32 s5, s8 15944; GFX7-NEXT: v_mov_b32_e32 v2, s7 15945; GFX7-NEXT: v_mov_b32_e32 v0, s6 15946; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15947; GFX7-NEXT: v_mov_b32_e32 v3, v0 15948; GFX7-NEXT: v_mov_b32_e32 v0, s4 15949; GFX7-NEXT: v_mov_b32_e32 v1, s5 15950; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15951; GFX7-NEXT: s_waitcnt vmcnt(0) 15952; GFX7-NEXT: buffer_wbinvl1_vol 15953; GFX7-NEXT: s_endpgm 15954; 15955; GFX10-WGP-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 15956; GFX10-WGP: ; %bb.0: ; %entry 15957; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 15958; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15959; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 15960; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 15961; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 15962; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 15963; GFX10-WGP-NEXT: s_mov_b32 s4, s8 15964; GFX10-WGP-NEXT: s_mov_b32 s5, s9 15965; GFX10-WGP-NEXT: s_mov_b32 s9, s10 15966; GFX10-WGP-NEXT: s_mov_b32 s8, s11 15967; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 15968; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 15969; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15970; GFX10-WGP-NEXT: s_mov_b32 s5, s8 15971; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 15972; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 15973; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 15974; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 15975; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 15976; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 15977; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 15978; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 15979; GFX10-WGP-NEXT: buffer_gl1_inv 15980; GFX10-WGP-NEXT: buffer_gl0_inv 15981; GFX10-WGP-NEXT: s_endpgm 15982; 15983; GFX10-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 15984; GFX10-CU: ; %bb.0: ; %entry 15985; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 15986; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 15987; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 15988; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 15989; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 15990; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 15991; GFX10-CU-NEXT: s_mov_b32 s4, s8 15992; GFX10-CU-NEXT: s_mov_b32 s5, s9 15993; GFX10-CU-NEXT: s_mov_b32 s9, s10 15994; GFX10-CU-NEXT: s_mov_b32 s8, s11 15995; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 15996; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 15997; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 15998; GFX10-CU-NEXT: s_mov_b32 s5, s8 15999; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 16000; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 16001; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16002; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 16003; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 16004; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 16005; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16006; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16007; GFX10-CU-NEXT: buffer_gl1_inv 16008; GFX10-CU-NEXT: buffer_gl0_inv 16009; GFX10-CU-NEXT: s_endpgm 16010; 16011; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16012; SKIP-CACHE-INV: ; %bb.0: ; %entry 16013; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 16014; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 16015; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 16016; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 16017; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 16018; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16019; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 16020; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 16021; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 16022; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 16023; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 16024; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 16025; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 16026; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 16027; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 16028; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 16029; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16030; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 16031; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 16032; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 16033; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16034; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16035; SKIP-CACHE-INV-NEXT: s_endpgm 16036; 16037; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16038; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16039; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16040; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16041; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16042; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16043; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16044; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16045; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16046; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16047; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16048; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16049; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16050; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16051; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16052; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16053; 16054; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16055; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16056; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16057; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16058; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16059; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16060; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16061; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16062; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16063; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16064; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16065; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16066; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16067; GFX90A-TGSPLIT-NEXT: buffer_invl2 16068; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16069; GFX90A-TGSPLIT-NEXT: s_endpgm 16070; 16071; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16072; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16073; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16074; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16075; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16076; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16077; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16078; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16079; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16080; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16081; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16082; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16083; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16084; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16085; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16086; 16087; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16088; GFX940-TGSPLIT: ; %bb.0: ; %entry 16089; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16090; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16091; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16092; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16093; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16094; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16095; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16096; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16097; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16098; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16099; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16100; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16101; GFX940-TGSPLIT-NEXT: s_endpgm 16102; 16103; GFX11-WGP-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16104; GFX11-WGP: ; %bb.0: ; %entry 16105; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16106; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16107; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16108; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16109; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 16110; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 16111; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16112; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 16113; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 16114; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 16115; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16116; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16117; GFX11-WGP-NEXT: buffer_gl1_inv 16118; GFX11-WGP-NEXT: buffer_gl0_inv 16119; GFX11-WGP-NEXT: s_endpgm 16120; 16121; GFX11-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16122; GFX11-CU: ; %bb.0: ; %entry 16123; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16124; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16125; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16126; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16127; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 16128; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 16129; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16130; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 16131; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 16132; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 16133; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16134; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16135; GFX11-CU-NEXT: buffer_gl1_inv 16136; GFX11-CU-NEXT: buffer_gl0_inv 16137; GFX11-CU-NEXT: s_endpgm 16138; 16139; GFX12-WGP-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16140; GFX12-WGP: ; %bb.0: ; %entry 16141; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16142; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16143; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16144; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16145; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 16146; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 16147; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16148; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 16149; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 16150; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 16151; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16152; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16153; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16154; GFX12-WGP-NEXT: s_endpgm 16155; 16156; GFX12-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: 16157; GFX12-CU: ; %bb.0: ; %entry 16158; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16159; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16160; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16161; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16162; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 16163; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 16164; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16165; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 16166; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 16167; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 16168; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16169; GFX12-CU-NEXT: s_wait_storecnt 0x0 16170; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16171; GFX12-CU-NEXT: s_endpgm 16172 ptr %out, i32 %in, i32 %old) { 16173entry: 16174 %gep = getelementptr i32, ptr %out, i32 4 16175 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 16176 ret void 16177} 16178 16179define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( 16180; GFX7-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16181; GFX7: ; %bb.0: ; %entry 16182; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16183; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16184; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16185; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16186; GFX7-NEXT: s_mov_b64 s[10:11], 16 16187; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16188; GFX7-NEXT: s_mov_b32 s4, s8 16189; GFX7-NEXT: s_mov_b32 s5, s9 16190; GFX7-NEXT: s_mov_b32 s9, s10 16191; GFX7-NEXT: s_mov_b32 s8, s11 16192; GFX7-NEXT: s_add_u32 s4, s4, s9 16193; GFX7-NEXT: s_addc_u32 s8, s5, s8 16194; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16195; GFX7-NEXT: s_mov_b32 s5, s8 16196; GFX7-NEXT: v_mov_b32_e32 v2, s7 16197; GFX7-NEXT: v_mov_b32_e32 v0, s6 16198; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16199; GFX7-NEXT: v_mov_b32_e32 v3, v0 16200; GFX7-NEXT: v_mov_b32_e32 v0, s4 16201; GFX7-NEXT: v_mov_b32_e32 v1, s5 16202; GFX7-NEXT: s_waitcnt vmcnt(0) 16203; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16204; GFX7-NEXT: s_waitcnt vmcnt(0) 16205; GFX7-NEXT: buffer_wbinvl1_vol 16206; GFX7-NEXT: s_endpgm 16207; 16208; GFX10-WGP-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16209; GFX10-WGP: ; %bb.0: ; %entry 16210; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 16211; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16212; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 16213; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 16214; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 16215; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16216; GFX10-WGP-NEXT: s_mov_b32 s4, s8 16217; GFX10-WGP-NEXT: s_mov_b32 s5, s9 16218; GFX10-WGP-NEXT: s_mov_b32 s9, s10 16219; GFX10-WGP-NEXT: s_mov_b32 s8, s11 16220; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 16221; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 16222; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16223; GFX10-WGP-NEXT: s_mov_b32 s5, s8 16224; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 16225; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 16226; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16227; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 16228; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 16229; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 16230; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16231; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16232; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16233; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16234; GFX10-WGP-NEXT: buffer_gl1_inv 16235; GFX10-WGP-NEXT: buffer_gl0_inv 16236; GFX10-WGP-NEXT: s_endpgm 16237; 16238; GFX10-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16239; GFX10-CU: ; %bb.0: ; %entry 16240; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 16241; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16242; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 16243; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 16244; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 16245; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16246; GFX10-CU-NEXT: s_mov_b32 s4, s8 16247; GFX10-CU-NEXT: s_mov_b32 s5, s9 16248; GFX10-CU-NEXT: s_mov_b32 s9, s10 16249; GFX10-CU-NEXT: s_mov_b32 s8, s11 16250; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 16251; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 16252; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16253; GFX10-CU-NEXT: s_mov_b32 s5, s8 16254; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 16255; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 16256; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16257; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 16258; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 16259; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 16260; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16261; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16262; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16263; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16264; GFX10-CU-NEXT: buffer_gl1_inv 16265; GFX10-CU-NEXT: buffer_gl0_inv 16266; GFX10-CU-NEXT: s_endpgm 16267; 16268; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16269; SKIP-CACHE-INV: ; %bb.0: ; %entry 16270; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 16271; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 16272; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 16273; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 16274; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 16275; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16276; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 16277; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 16278; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 16279; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 16280; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 16281; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 16282; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 16283; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 16284; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 16285; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 16286; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16287; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 16288; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 16289; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 16290; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16291; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16292; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16293; SKIP-CACHE-INV-NEXT: s_endpgm 16294; 16295; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16296; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16297; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16298; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16299; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16300; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16301; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16302; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16303; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16304; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16305; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16306; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16307; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16308; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16309; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16310; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16311; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16312; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16313; 16314; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16315; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16316; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16317; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16318; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16319; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16320; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16321; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16322; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16323; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16324; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16325; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16326; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16327; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16328; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16329; GFX90A-TGSPLIT-NEXT: buffer_invl2 16330; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16331; GFX90A-TGSPLIT-NEXT: s_endpgm 16332; 16333; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16334; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16335; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16336; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16337; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16338; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16339; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16340; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16341; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16342; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16343; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16344; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16345; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16346; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16347; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16348; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16349; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16350; 16351; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16352; GFX940-TGSPLIT: ; %bb.0: ; %entry 16353; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16354; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16355; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16356; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16357; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16358; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16359; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16360; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16361; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16362; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16363; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16364; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16365; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16366; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16367; GFX940-TGSPLIT-NEXT: s_endpgm 16368; 16369; GFX11-WGP-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16370; GFX11-WGP: ; %bb.0: ; %entry 16371; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16372; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16373; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16374; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16375; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 16376; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 16377; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16378; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 16379; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 16380; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 16381; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16382; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16383; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16384; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16385; GFX11-WGP-NEXT: buffer_gl1_inv 16386; GFX11-WGP-NEXT: buffer_gl0_inv 16387; GFX11-WGP-NEXT: s_endpgm 16388; 16389; GFX11-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16390; GFX11-CU: ; %bb.0: ; %entry 16391; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16392; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16393; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16394; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16395; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 16396; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 16397; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16398; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 16399; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 16400; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 16401; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16402; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16403; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16404; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16405; GFX11-CU-NEXT: buffer_gl1_inv 16406; GFX11-CU-NEXT: buffer_gl0_inv 16407; GFX11-CU-NEXT: s_endpgm 16408; 16409; GFX12-WGP-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16410; GFX12-WGP: ; %bb.0: ; %entry 16411; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16412; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16413; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16414; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16415; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 16416; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 16417; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16418; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 16419; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 16420; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 16421; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16422; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16423; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16424; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16425; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16426; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16427; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16428; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16429; GFX12-WGP-NEXT: s_endpgm 16430; 16431; GFX12-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg: 16432; GFX12-CU: ; %bb.0: ; %entry 16433; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16434; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16435; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16436; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16437; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 16438; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 16439; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16440; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 16441; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 16442; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 16443; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16444; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16445; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16446; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16447; GFX12-CU-NEXT: s_wait_storecnt 0x0 16448; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16449; GFX12-CU-NEXT: s_wait_storecnt 0x0 16450; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16451; GFX12-CU-NEXT: s_endpgm 16452 ptr %out, i32 %in, i32 %old) { 16453entry: 16454 %gep = getelementptr i32, ptr %out, i32 4 16455 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire 16456 ret void 16457} 16458 16459define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( 16460; GFX7-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16461; GFX7: ; %bb.0: ; %entry 16462; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16463; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16464; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16465; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16466; GFX7-NEXT: s_mov_b64 s[10:11], 16 16467; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16468; GFX7-NEXT: s_mov_b32 s4, s8 16469; GFX7-NEXT: s_mov_b32 s5, s9 16470; GFX7-NEXT: s_mov_b32 s9, s10 16471; GFX7-NEXT: s_mov_b32 s8, s11 16472; GFX7-NEXT: s_add_u32 s4, s4, s9 16473; GFX7-NEXT: s_addc_u32 s8, s5, s8 16474; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16475; GFX7-NEXT: s_mov_b32 s5, s8 16476; GFX7-NEXT: v_mov_b32_e32 v2, s7 16477; GFX7-NEXT: v_mov_b32_e32 v0, s6 16478; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16479; GFX7-NEXT: v_mov_b32_e32 v3, v0 16480; GFX7-NEXT: v_mov_b32_e32 v0, s4 16481; GFX7-NEXT: v_mov_b32_e32 v1, s5 16482; GFX7-NEXT: s_waitcnt vmcnt(0) 16483; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16484; GFX7-NEXT: s_waitcnt vmcnt(0) 16485; GFX7-NEXT: buffer_wbinvl1_vol 16486; GFX7-NEXT: s_endpgm 16487; 16488; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16489; GFX10-WGP: ; %bb.0: ; %entry 16490; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 16491; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16492; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 16493; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 16494; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 16495; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16496; GFX10-WGP-NEXT: s_mov_b32 s4, s8 16497; GFX10-WGP-NEXT: s_mov_b32 s5, s9 16498; GFX10-WGP-NEXT: s_mov_b32 s9, s10 16499; GFX10-WGP-NEXT: s_mov_b32 s8, s11 16500; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 16501; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 16502; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16503; GFX10-WGP-NEXT: s_mov_b32 s5, s8 16504; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 16505; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 16506; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16507; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 16508; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 16509; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 16510; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16511; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16512; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16513; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16514; GFX10-WGP-NEXT: buffer_gl1_inv 16515; GFX10-WGP-NEXT: buffer_gl0_inv 16516; GFX10-WGP-NEXT: s_endpgm 16517; 16518; GFX10-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16519; GFX10-CU: ; %bb.0: ; %entry 16520; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 16521; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16522; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 16523; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 16524; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 16525; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16526; GFX10-CU-NEXT: s_mov_b32 s4, s8 16527; GFX10-CU-NEXT: s_mov_b32 s5, s9 16528; GFX10-CU-NEXT: s_mov_b32 s9, s10 16529; GFX10-CU-NEXT: s_mov_b32 s8, s11 16530; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 16531; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 16532; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16533; GFX10-CU-NEXT: s_mov_b32 s5, s8 16534; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 16535; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 16536; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16537; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 16538; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 16539; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 16540; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16541; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16542; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16543; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16544; GFX10-CU-NEXT: buffer_gl1_inv 16545; GFX10-CU-NEXT: buffer_gl0_inv 16546; GFX10-CU-NEXT: s_endpgm 16547; 16548; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16549; SKIP-CACHE-INV: ; %bb.0: ; %entry 16550; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 16551; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 16552; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 16553; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 16554; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 16555; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16556; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 16557; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 16558; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 16559; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 16560; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 16561; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 16562; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 16563; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 16564; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 16565; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 16566; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16567; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 16568; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 16569; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 16570; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16571; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16572; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16573; SKIP-CACHE-INV-NEXT: s_endpgm 16574; 16575; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16576; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16577; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16578; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16579; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16580; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16581; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16582; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16583; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16584; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16585; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16586; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16587; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16588; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16589; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16590; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16591; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16592; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16593; 16594; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16595; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16596; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16597; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16598; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16599; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16600; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16601; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16602; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16603; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16604; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16605; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16606; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16607; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16608; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16609; GFX90A-TGSPLIT-NEXT: buffer_invl2 16610; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16611; GFX90A-TGSPLIT-NEXT: s_endpgm 16612; 16613; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16614; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16615; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16616; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16617; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16618; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16619; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16620; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16621; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16622; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16623; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16624; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16625; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16626; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16627; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16628; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16629; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16630; 16631; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16632; GFX940-TGSPLIT: ; %bb.0: ; %entry 16633; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16634; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16635; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16636; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16637; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16638; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16639; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16640; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16641; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16642; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16643; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16644; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16645; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16646; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16647; GFX940-TGSPLIT-NEXT: s_endpgm 16648; 16649; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16650; GFX11-WGP: ; %bb.0: ; %entry 16651; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16652; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16653; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16654; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16655; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 16656; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 16657; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16658; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 16659; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 16660; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 16661; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16662; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16663; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16664; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16665; GFX11-WGP-NEXT: buffer_gl1_inv 16666; GFX11-WGP-NEXT: buffer_gl0_inv 16667; GFX11-WGP-NEXT: s_endpgm 16668; 16669; GFX11-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16670; GFX11-CU: ; %bb.0: ; %entry 16671; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16672; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16673; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16674; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16675; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 16676; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 16677; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16678; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 16679; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 16680; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 16681; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16682; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16683; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16684; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16685; GFX11-CU-NEXT: buffer_gl1_inv 16686; GFX11-CU-NEXT: buffer_gl0_inv 16687; GFX11-CU-NEXT: s_endpgm 16688; 16689; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16690; GFX12-WGP: ; %bb.0: ; %entry 16691; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16692; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16693; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16694; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16695; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 16696; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 16697; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16698; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 16699; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 16700; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 16701; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16702; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16703; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16704; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16705; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16706; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16707; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16708; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16709; GFX12-WGP-NEXT: s_endpgm 16710; 16711; GFX12-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: 16712; GFX12-CU: ; %bb.0: ; %entry 16713; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16714; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16715; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16716; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16717; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 16718; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 16719; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16720; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 16721; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 16722; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 16723; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 16724; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 16725; GFX12-CU-NEXT: s_wait_samplecnt 0x0 16726; GFX12-CU-NEXT: s_wait_loadcnt 0x0 16727; GFX12-CU-NEXT: s_wait_storecnt 0x0 16728; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16729; GFX12-CU-NEXT: s_wait_storecnt 0x0 16730; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 16731; GFX12-CU-NEXT: s_endpgm 16732 ptr %out, i32 %in, i32 %old) { 16733entry: 16734 %gep = getelementptr i32, ptr %out, i32 4 16735 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 16736 ret void 16737} 16738 16739define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( 16740; GFX7-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16741; GFX7: ; %bb.0: ; %entry 16742; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 16743; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16744; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 16745; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 16746; GFX7-NEXT: s_mov_b64 s[10:11], 16 16747; GFX7-NEXT: s_waitcnt lgkmcnt(0) 16748; GFX7-NEXT: s_mov_b32 s4, s8 16749; GFX7-NEXT: s_mov_b32 s5, s9 16750; GFX7-NEXT: s_mov_b32 s9, s10 16751; GFX7-NEXT: s_mov_b32 s8, s11 16752; GFX7-NEXT: s_add_u32 s4, s4, s9 16753; GFX7-NEXT: s_addc_u32 s8, s5, s8 16754; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16755; GFX7-NEXT: s_mov_b32 s5, s8 16756; GFX7-NEXT: v_mov_b32_e32 v2, s7 16757; GFX7-NEXT: v_mov_b32_e32 v0, s6 16758; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16759; GFX7-NEXT: v_mov_b32_e32 v3, v0 16760; GFX7-NEXT: v_mov_b32_e32 v0, s4 16761; GFX7-NEXT: v_mov_b32_e32 v1, s5 16762; GFX7-NEXT: s_waitcnt vmcnt(0) 16763; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16764; GFX7-NEXT: s_waitcnt vmcnt(0) 16765; GFX7-NEXT: buffer_wbinvl1_vol 16766; GFX7-NEXT: s_endpgm 16767; 16768; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16769; GFX10-WGP: ; %bb.0: ; %entry 16770; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 16771; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16772; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 16773; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 16774; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 16775; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 16776; GFX10-WGP-NEXT: s_mov_b32 s4, s8 16777; GFX10-WGP-NEXT: s_mov_b32 s5, s9 16778; GFX10-WGP-NEXT: s_mov_b32 s9, s10 16779; GFX10-WGP-NEXT: s_mov_b32 s8, s11 16780; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 16781; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 16782; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16783; GFX10-WGP-NEXT: s_mov_b32 s5, s8 16784; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 16785; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 16786; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16787; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 16788; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 16789; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 16790; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 16791; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16792; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16793; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16794; GFX10-WGP-NEXT: buffer_gl1_inv 16795; GFX10-WGP-NEXT: buffer_gl0_inv 16796; GFX10-WGP-NEXT: s_endpgm 16797; 16798; GFX10-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16799; GFX10-CU: ; %bb.0: ; %entry 16800; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 16801; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 16802; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 16803; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 16804; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 16805; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 16806; GFX10-CU-NEXT: s_mov_b32 s4, s8 16807; GFX10-CU-NEXT: s_mov_b32 s5, s9 16808; GFX10-CU-NEXT: s_mov_b32 s9, s10 16809; GFX10-CU-NEXT: s_mov_b32 s8, s11 16810; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 16811; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 16812; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 16813; GFX10-CU-NEXT: s_mov_b32 s5, s8 16814; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 16815; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 16816; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16817; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 16818; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 16819; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 16820; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 16821; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16822; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16823; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 16824; GFX10-CU-NEXT: buffer_gl1_inv 16825; GFX10-CU-NEXT: buffer_gl0_inv 16826; GFX10-CU-NEXT: s_endpgm 16827; 16828; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16829; SKIP-CACHE-INV: ; %bb.0: ; %entry 16830; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 16831; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 16832; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 16833; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 16834; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 16835; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 16836; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 16837; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 16838; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 16839; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 16840; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 16841; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 16842; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 16843; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 16844; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 16845; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 16846; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16847; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 16848; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 16849; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 16850; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16851; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 16852; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 16853; SKIP-CACHE-INV-NEXT: s_endpgm 16854; 16855; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16856; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 16857; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16858; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16859; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16860; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16861; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16862; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16863; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16864; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16865; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16866; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 16867; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16868; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16869; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16870; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 16871; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 16872; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 16873; 16874; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16875; GFX90A-TGSPLIT: ; %bb.0: ; %entry 16876; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 16877; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 16878; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 16879; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16880; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 16881; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 16882; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16883; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16884; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 16885; GFX90A-TGSPLIT-NEXT: buffer_wbl2 16886; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16887; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 16888; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16889; GFX90A-TGSPLIT-NEXT: buffer_invl2 16890; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 16891; GFX90A-TGSPLIT-NEXT: s_endpgm 16892; 16893; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16894; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 16895; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16896; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16897; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16898; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16899; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16900; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16901; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16902; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16903; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16904; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16905; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16906; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16907; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 16908; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 16909; GFX940-NOTTGSPLIT-NEXT: s_endpgm 16910; 16911; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16912; GFX940-TGSPLIT: ; %bb.0: ; %entry 16913; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 16914; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 16915; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 16916; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 16917; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 16918; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 16919; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16920; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 16921; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 16922; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 16923; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16924; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 16925; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 16926; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 16927; GFX940-TGSPLIT-NEXT: s_endpgm 16928; 16929; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16930; GFX11-WGP: ; %bb.0: ; %entry 16931; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16932; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16933; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16934; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 16935; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 16936; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 16937; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16938; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 16939; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 16940; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 16941; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 16942; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16943; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16944; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 16945; GFX11-WGP-NEXT: buffer_gl1_inv 16946; GFX11-WGP-NEXT: buffer_gl0_inv 16947; GFX11-WGP-NEXT: s_endpgm 16948; 16949; GFX11-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16950; GFX11-CU: ; %bb.0: ; %entry 16951; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16952; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16953; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16954; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 16955; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 16956; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 16957; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16958; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 16959; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 16960; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 16961; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 16962; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16963; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 16964; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 16965; GFX11-CU-NEXT: buffer_gl1_inv 16966; GFX11-CU-NEXT: buffer_gl0_inv 16967; GFX11-CU-NEXT: s_endpgm 16968; 16969; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16970; GFX12-WGP: ; %bb.0: ; %entry 16971; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16972; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 16973; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 16974; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 16975; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 16976; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 16977; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 16978; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 16979; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 16980; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 16981; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 16982; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 16983; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 16984; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 16985; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16986; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 16987; GFX12-WGP-NEXT: s_wait_storecnt 0x0 16988; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 16989; GFX12-WGP-NEXT: s_endpgm 16990; 16991; GFX12-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: 16992; GFX12-CU: ; %bb.0: ; %entry 16993; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 16994; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 16995; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 16996; GFX12-CU-NEXT: s_wait_kmcnt 0x0 16997; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 16998; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 16999; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17000; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 17001; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 17002; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 17003; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 17004; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17005; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17006; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17007; GFX12-CU-NEXT: s_wait_storecnt 0x0 17008; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17009; GFX12-CU-NEXT: s_wait_storecnt 0x0 17010; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 17011; GFX12-CU-NEXT: s_endpgm 17012 ptr %out, i32 %in, i32 %old) { 17013entry: 17014 %gep = getelementptr i32, ptr %out, i32 4 17015 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 17016 ret void 17017} 17018 17019define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( 17020; GFX7-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17021; GFX7: ; %bb.0: ; %entry 17022; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17023; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17024; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17025; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17026; GFX7-NEXT: s_mov_b64 s[10:11], 16 17027; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17028; GFX7-NEXT: s_mov_b32 s4, s8 17029; GFX7-NEXT: s_mov_b32 s5, s9 17030; GFX7-NEXT: s_mov_b32 s9, s10 17031; GFX7-NEXT: s_mov_b32 s8, s11 17032; GFX7-NEXT: s_add_u32 s4, s4, s9 17033; GFX7-NEXT: s_addc_u32 s8, s5, s8 17034; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17035; GFX7-NEXT: s_mov_b32 s5, s8 17036; GFX7-NEXT: v_mov_b32_e32 v2, s7 17037; GFX7-NEXT: v_mov_b32_e32 v0, s6 17038; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17039; GFX7-NEXT: v_mov_b32_e32 v3, v0 17040; GFX7-NEXT: v_mov_b32_e32 v0, s4 17041; GFX7-NEXT: v_mov_b32_e32 v1, s5 17042; GFX7-NEXT: s_waitcnt vmcnt(0) 17043; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17044; GFX7-NEXT: s_waitcnt vmcnt(0) 17045; GFX7-NEXT: buffer_wbinvl1_vol 17046; GFX7-NEXT: s_endpgm 17047; 17048; GFX10-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17049; GFX10-WGP: ; %bb.0: ; %entry 17050; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 17051; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17052; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 17053; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 17054; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 17055; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17056; GFX10-WGP-NEXT: s_mov_b32 s4, s8 17057; GFX10-WGP-NEXT: s_mov_b32 s5, s9 17058; GFX10-WGP-NEXT: s_mov_b32 s9, s10 17059; GFX10-WGP-NEXT: s_mov_b32 s8, s11 17060; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 17061; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 17062; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17063; GFX10-WGP-NEXT: s_mov_b32 s5, s8 17064; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 17065; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 17066; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17067; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 17068; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 17069; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 17070; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17071; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17072; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17073; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17074; GFX10-WGP-NEXT: buffer_gl1_inv 17075; GFX10-WGP-NEXT: buffer_gl0_inv 17076; GFX10-WGP-NEXT: s_endpgm 17077; 17078; GFX10-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17079; GFX10-CU: ; %bb.0: ; %entry 17080; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 17081; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17082; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 17083; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 17084; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 17085; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17086; GFX10-CU-NEXT: s_mov_b32 s4, s8 17087; GFX10-CU-NEXT: s_mov_b32 s5, s9 17088; GFX10-CU-NEXT: s_mov_b32 s9, s10 17089; GFX10-CU-NEXT: s_mov_b32 s8, s11 17090; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 17091; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 17092; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17093; GFX10-CU-NEXT: s_mov_b32 s5, s8 17094; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 17095; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 17096; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17097; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 17098; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 17099; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 17100; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17101; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17102; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17103; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17104; GFX10-CU-NEXT: buffer_gl1_inv 17105; GFX10-CU-NEXT: buffer_gl0_inv 17106; GFX10-CU-NEXT: s_endpgm 17107; 17108; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17109; SKIP-CACHE-INV: ; %bb.0: ; %entry 17110; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 17111; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 17112; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 17113; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 17114; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 17115; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17116; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 17117; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 17118; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 17119; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 17120; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 17121; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 17122; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 17123; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 17124; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 17125; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 17126; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17127; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 17128; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 17129; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 17130; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17131; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17132; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17133; SKIP-CACHE-INV-NEXT: s_endpgm 17134; 17135; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17136; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17137; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17138; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17139; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17140; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17141; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17142; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17143; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17144; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17145; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17146; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17147; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17148; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17149; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17150; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17151; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17152; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17153; 17154; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17155; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17156; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17157; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17158; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17159; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17160; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17161; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17162; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17163; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17164; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17165; GFX90A-TGSPLIT-NEXT: buffer_wbl2 17166; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17167; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17168; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17169; GFX90A-TGSPLIT-NEXT: buffer_invl2 17170; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17171; GFX90A-TGSPLIT-NEXT: s_endpgm 17172; 17173; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17174; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17175; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17176; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17177; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17178; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17179; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17180; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17181; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17182; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17183; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17184; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17185; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17186; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17187; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17188; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 17189; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17190; 17191; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17192; GFX940-TGSPLIT: ; %bb.0: ; %entry 17193; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17194; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17195; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17196; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17197; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17198; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17199; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17200; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17201; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17202; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17203; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17204; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17205; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17206; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 17207; GFX940-TGSPLIT-NEXT: s_endpgm 17208; 17209; GFX11-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17210; GFX11-WGP: ; %bb.0: ; %entry 17211; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17212; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17213; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17214; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17215; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 17216; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 17217; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17218; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 17219; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 17220; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 17221; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17222; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17223; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17224; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17225; GFX11-WGP-NEXT: buffer_gl1_inv 17226; GFX11-WGP-NEXT: buffer_gl0_inv 17227; GFX11-WGP-NEXT: s_endpgm 17228; 17229; GFX11-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17230; GFX11-CU: ; %bb.0: ; %entry 17231; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17232; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17233; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17234; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17235; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 17236; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 17237; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17238; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 17239; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 17240; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 17241; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17242; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17243; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17244; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17245; GFX11-CU-NEXT: buffer_gl1_inv 17246; GFX11-CU-NEXT: buffer_gl0_inv 17247; GFX11-CU-NEXT: s_endpgm 17248; 17249; GFX12-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17250; GFX12-WGP: ; %bb.0: ; %entry 17251; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17252; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17253; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17254; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17255; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 17256; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 17257; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17258; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 17259; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 17260; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 17261; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 17262; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17263; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17264; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17265; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17266; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17267; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17268; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 17269; GFX12-WGP-NEXT: s_endpgm 17270; 17271; GFX12-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: 17272; GFX12-CU: ; %bb.0: ; %entry 17273; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17274; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17275; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17276; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17277; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 17278; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 17279; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17280; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 17281; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 17282; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 17283; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 17284; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17285; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17286; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17287; GFX12-CU-NEXT: s_wait_storecnt 0x0 17288; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17289; GFX12-CU-NEXT: s_wait_storecnt 0x0 17290; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 17291; GFX12-CU-NEXT: s_endpgm 17292 ptr %out, i32 %in, i32 %old) { 17293entry: 17294 %gep = getelementptr i32, ptr %out, i32 4 17295 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst 17296 ret void 17297} 17298 17299define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( 17300; GFX7-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17301; GFX7: ; %bb.0: ; %entry 17302; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17303; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17304; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17305; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17306; GFX7-NEXT: s_mov_b64 s[10:11], 16 17307; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17308; GFX7-NEXT: s_mov_b32 s4, s8 17309; GFX7-NEXT: s_mov_b32 s5, s9 17310; GFX7-NEXT: s_mov_b32 s9, s10 17311; GFX7-NEXT: s_mov_b32 s8, s11 17312; GFX7-NEXT: s_add_u32 s4, s4, s9 17313; GFX7-NEXT: s_addc_u32 s8, s5, s8 17314; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17315; GFX7-NEXT: s_mov_b32 s5, s8 17316; GFX7-NEXT: v_mov_b32_e32 v2, s7 17317; GFX7-NEXT: v_mov_b32_e32 v0, s6 17318; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17319; GFX7-NEXT: v_mov_b32_e32 v3, v0 17320; GFX7-NEXT: v_mov_b32_e32 v0, s4 17321; GFX7-NEXT: v_mov_b32_e32 v1, s5 17322; GFX7-NEXT: s_waitcnt vmcnt(0) 17323; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17324; GFX7-NEXT: s_waitcnt vmcnt(0) 17325; GFX7-NEXT: buffer_wbinvl1_vol 17326; GFX7-NEXT: s_endpgm 17327; 17328; GFX10-WGP-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17329; GFX10-WGP: ; %bb.0: ; %entry 17330; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 17331; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17332; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 17333; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 17334; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 17335; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17336; GFX10-WGP-NEXT: s_mov_b32 s4, s8 17337; GFX10-WGP-NEXT: s_mov_b32 s5, s9 17338; GFX10-WGP-NEXT: s_mov_b32 s9, s10 17339; GFX10-WGP-NEXT: s_mov_b32 s8, s11 17340; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 17341; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 17342; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17343; GFX10-WGP-NEXT: s_mov_b32 s5, s8 17344; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 17345; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 17346; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17347; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 17348; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 17349; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 17350; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17351; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17352; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17353; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17354; GFX10-WGP-NEXT: buffer_gl1_inv 17355; GFX10-WGP-NEXT: buffer_gl0_inv 17356; GFX10-WGP-NEXT: s_endpgm 17357; 17358; GFX10-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17359; GFX10-CU: ; %bb.0: ; %entry 17360; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 17361; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17362; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 17363; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 17364; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 17365; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17366; GFX10-CU-NEXT: s_mov_b32 s4, s8 17367; GFX10-CU-NEXT: s_mov_b32 s5, s9 17368; GFX10-CU-NEXT: s_mov_b32 s9, s10 17369; GFX10-CU-NEXT: s_mov_b32 s8, s11 17370; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 17371; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 17372; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17373; GFX10-CU-NEXT: s_mov_b32 s5, s8 17374; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 17375; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 17376; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17377; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 17378; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 17379; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 17380; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17381; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17382; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17383; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17384; GFX10-CU-NEXT: buffer_gl1_inv 17385; GFX10-CU-NEXT: buffer_gl0_inv 17386; GFX10-CU-NEXT: s_endpgm 17387; 17388; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17389; SKIP-CACHE-INV: ; %bb.0: ; %entry 17390; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 17391; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 17392; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 17393; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 17394; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 17395; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17396; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 17397; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 17398; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 17399; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 17400; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 17401; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 17402; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 17403; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 17404; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 17405; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 17406; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17407; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 17408; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 17409; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 17410; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17411; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17412; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17413; SKIP-CACHE-INV-NEXT: s_endpgm 17414; 17415; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17416; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17417; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17418; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17419; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17420; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17421; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17422; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17423; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17424; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17425; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17426; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17427; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17428; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17429; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17430; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17431; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17432; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17433; 17434; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17435; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17436; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17437; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17438; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17439; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17440; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17441; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17442; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17443; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17444; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17445; GFX90A-TGSPLIT-NEXT: buffer_wbl2 17446; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17447; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17448; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17449; GFX90A-TGSPLIT-NEXT: buffer_invl2 17450; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17451; GFX90A-TGSPLIT-NEXT: s_endpgm 17452; 17453; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17454; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17455; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17456; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17457; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17458; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17459; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17460; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17461; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17462; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17463; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17464; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17465; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17466; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17467; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17468; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 17469; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17470; 17471; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17472; GFX940-TGSPLIT: ; %bb.0: ; %entry 17473; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17474; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17475; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17476; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17477; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17478; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17479; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17480; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17481; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17482; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17483; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17484; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17485; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17486; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 17487; GFX940-TGSPLIT-NEXT: s_endpgm 17488; 17489; GFX11-WGP-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17490; GFX11-WGP: ; %bb.0: ; %entry 17491; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17492; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17493; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17494; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17495; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 17496; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 17497; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17498; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 17499; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 17500; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 17501; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17502; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17503; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17504; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17505; GFX11-WGP-NEXT: buffer_gl1_inv 17506; GFX11-WGP-NEXT: buffer_gl0_inv 17507; GFX11-WGP-NEXT: s_endpgm 17508; 17509; GFX11-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17510; GFX11-CU: ; %bb.0: ; %entry 17511; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17512; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17513; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17514; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17515; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 17516; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 17517; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17518; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 17519; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 17520; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 17521; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17522; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17523; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17524; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17525; GFX11-CU-NEXT: buffer_gl1_inv 17526; GFX11-CU-NEXT: buffer_gl0_inv 17527; GFX11-CU-NEXT: s_endpgm 17528; 17529; GFX12-WGP-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17530; GFX12-WGP: ; %bb.0: ; %entry 17531; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17532; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17533; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17534; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17535; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 17536; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 17537; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17538; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 17539; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 17540; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 17541; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 17542; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17543; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17544; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17545; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17546; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17547; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17548; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 17549; GFX12-WGP-NEXT: s_endpgm 17550; 17551; GFX12-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: 17552; GFX12-CU: ; %bb.0: ; %entry 17553; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17554; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17555; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17556; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17557; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 17558; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 17559; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17560; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 17561; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 17562; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 17563; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 17564; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17565; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17566; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17567; GFX12-CU-NEXT: s_wait_storecnt 0x0 17568; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17569; GFX12-CU-NEXT: s_wait_storecnt 0x0 17570; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 17571; GFX12-CU-NEXT: s_endpgm 17572 ptr %out, i32 %in, i32 %old) { 17573entry: 17574 %gep = getelementptr i32, ptr %out, i32 4 17575 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst 17576 ret void 17577} 17578 17579define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( 17580; GFX7-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17581; GFX7: ; %bb.0: ; %entry 17582; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17583; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17584; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17585; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17586; GFX7-NEXT: s_mov_b64 s[10:11], 16 17587; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17588; GFX7-NEXT: s_mov_b32 s4, s8 17589; GFX7-NEXT: s_mov_b32 s5, s9 17590; GFX7-NEXT: s_mov_b32 s9, s10 17591; GFX7-NEXT: s_mov_b32 s8, s11 17592; GFX7-NEXT: s_add_u32 s4, s4, s9 17593; GFX7-NEXT: s_addc_u32 s8, s5, s8 17594; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17595; GFX7-NEXT: s_mov_b32 s5, s8 17596; GFX7-NEXT: v_mov_b32_e32 v2, s7 17597; GFX7-NEXT: v_mov_b32_e32 v0, s6 17598; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17599; GFX7-NEXT: v_mov_b32_e32 v3, v0 17600; GFX7-NEXT: v_mov_b32_e32 v0, s4 17601; GFX7-NEXT: v_mov_b32_e32 v1, s5 17602; GFX7-NEXT: s_waitcnt vmcnt(0) 17603; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17604; GFX7-NEXT: s_waitcnt vmcnt(0) 17605; GFX7-NEXT: buffer_wbinvl1_vol 17606; GFX7-NEXT: s_endpgm 17607; 17608; GFX10-WGP-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17609; GFX10-WGP: ; %bb.0: ; %entry 17610; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 17611; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17612; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 17613; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 17614; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 17615; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17616; GFX10-WGP-NEXT: s_mov_b32 s4, s8 17617; GFX10-WGP-NEXT: s_mov_b32 s5, s9 17618; GFX10-WGP-NEXT: s_mov_b32 s9, s10 17619; GFX10-WGP-NEXT: s_mov_b32 s8, s11 17620; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 17621; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 17622; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17623; GFX10-WGP-NEXT: s_mov_b32 s5, s8 17624; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 17625; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 17626; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17627; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 17628; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 17629; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 17630; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17631; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17632; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17633; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17634; GFX10-WGP-NEXT: buffer_gl1_inv 17635; GFX10-WGP-NEXT: buffer_gl0_inv 17636; GFX10-WGP-NEXT: s_endpgm 17637; 17638; GFX10-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17639; GFX10-CU: ; %bb.0: ; %entry 17640; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 17641; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17642; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 17643; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 17644; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 17645; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17646; GFX10-CU-NEXT: s_mov_b32 s4, s8 17647; GFX10-CU-NEXT: s_mov_b32 s5, s9 17648; GFX10-CU-NEXT: s_mov_b32 s9, s10 17649; GFX10-CU-NEXT: s_mov_b32 s8, s11 17650; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 17651; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 17652; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17653; GFX10-CU-NEXT: s_mov_b32 s5, s8 17654; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 17655; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 17656; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17657; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 17658; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 17659; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 17660; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17661; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17662; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17663; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17664; GFX10-CU-NEXT: buffer_gl1_inv 17665; GFX10-CU-NEXT: buffer_gl0_inv 17666; GFX10-CU-NEXT: s_endpgm 17667; 17668; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17669; SKIP-CACHE-INV: ; %bb.0: ; %entry 17670; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 17671; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 17672; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 17673; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 17674; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 17675; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17676; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 17677; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 17678; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 17679; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 17680; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 17681; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 17682; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 17683; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 17684; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 17685; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 17686; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17687; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 17688; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 17689; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 17690; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17691; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17692; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17693; SKIP-CACHE-INV-NEXT: s_endpgm 17694; 17695; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17696; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17697; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17698; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17699; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17700; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17701; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17702; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17703; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17704; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17705; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17706; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17707; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17708; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17709; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17710; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17711; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17712; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17713; 17714; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17715; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17716; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17717; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17718; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17719; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17720; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17721; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17722; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17723; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17724; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17725; GFX90A-TGSPLIT-NEXT: buffer_wbl2 17726; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17727; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17728; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17729; GFX90A-TGSPLIT-NEXT: buffer_invl2 17730; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 17731; GFX90A-TGSPLIT-NEXT: s_endpgm 17732; 17733; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17734; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 17735; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17736; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17737; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17738; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17739; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17740; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17741; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17742; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17743; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17744; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17745; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17746; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17747; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17748; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 17749; GFX940-NOTTGSPLIT-NEXT: s_endpgm 17750; 17751; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17752; GFX940-TGSPLIT: ; %bb.0: ; %entry 17753; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 17754; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 17755; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 17756; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17757; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 17758; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 17759; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17760; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17761; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 17762; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 17763; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17764; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 17765; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 17766; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 17767; GFX940-TGSPLIT-NEXT: s_endpgm 17768; 17769; GFX11-WGP-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17770; GFX11-WGP: ; %bb.0: ; %entry 17771; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17772; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17773; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17774; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 17775; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 17776; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 17777; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17778; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 17779; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 17780; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 17781; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 17782; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17783; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17784; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17785; GFX11-WGP-NEXT: buffer_gl1_inv 17786; GFX11-WGP-NEXT: buffer_gl0_inv 17787; GFX11-WGP-NEXT: s_endpgm 17788; 17789; GFX11-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17790; GFX11-CU: ; %bb.0: ; %entry 17791; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17792; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17793; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17794; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 17795; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 17796; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 17797; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17798; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 17799; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 17800; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 17801; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 17802; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17803; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 17804; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 17805; GFX11-CU-NEXT: buffer_gl1_inv 17806; GFX11-CU-NEXT: buffer_gl0_inv 17807; GFX11-CU-NEXT: s_endpgm 17808; 17809; GFX12-WGP-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17810; GFX12-WGP: ; %bb.0: ; %entry 17811; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17812; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 17813; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 17814; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 17815; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 17816; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 17817; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17818; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 17819; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 17820; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 17821; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 17822; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 17823; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 17824; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 17825; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17826; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17827; GFX12-WGP-NEXT: s_wait_storecnt 0x0 17828; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 17829; GFX12-WGP-NEXT: s_endpgm 17830; 17831; GFX12-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: 17832; GFX12-CU: ; %bb.0: ; %entry 17833; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 17834; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 17835; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 17836; GFX12-CU-NEXT: s_wait_kmcnt 0x0 17837; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 17838; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 17839; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17840; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 17841; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 17842; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 17843; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 17844; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 17845; GFX12-CU-NEXT: s_wait_samplecnt 0x0 17846; GFX12-CU-NEXT: s_wait_loadcnt 0x0 17847; GFX12-CU-NEXT: s_wait_storecnt 0x0 17848; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 17849; GFX12-CU-NEXT: s_wait_storecnt 0x0 17850; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 17851; GFX12-CU-NEXT: s_endpgm 17852 ptr %out, i32 %in, i32 %old) { 17853entry: 17854 %gep = getelementptr i32, ptr %out, i32 4 17855 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst 17856 ret void 17857} 17858 17859define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( 17860; GFX7-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17861; GFX7: ; %bb.0: ; %entry 17862; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 17863; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17864; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 17865; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 17866; GFX7-NEXT: s_mov_b64 s[10:11], 16 17867; GFX7-NEXT: s_waitcnt lgkmcnt(0) 17868; GFX7-NEXT: s_mov_b32 s4, s8 17869; GFX7-NEXT: s_mov_b32 s5, s9 17870; GFX7-NEXT: s_mov_b32 s9, s10 17871; GFX7-NEXT: s_mov_b32 s8, s11 17872; GFX7-NEXT: s_add_u32 s4, s4, s9 17873; GFX7-NEXT: s_addc_u32 s8, s5, s8 17874; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17875; GFX7-NEXT: s_mov_b32 s5, s8 17876; GFX7-NEXT: v_mov_b32_e32 v2, s7 17877; GFX7-NEXT: v_mov_b32_e32 v0, s6 17878; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17879; GFX7-NEXT: v_mov_b32_e32 v3, v0 17880; GFX7-NEXT: v_mov_b32_e32 v0, s4 17881; GFX7-NEXT: v_mov_b32_e32 v1, s5 17882; GFX7-NEXT: s_waitcnt vmcnt(0) 17883; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17884; GFX7-NEXT: s_waitcnt vmcnt(0) 17885; GFX7-NEXT: buffer_wbinvl1_vol 17886; GFX7-NEXT: s_endpgm 17887; 17888; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17889; GFX10-WGP: ; %bb.0: ; %entry 17890; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 17891; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17892; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 17893; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 17894; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 17895; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 17896; GFX10-WGP-NEXT: s_mov_b32 s4, s8 17897; GFX10-WGP-NEXT: s_mov_b32 s5, s9 17898; GFX10-WGP-NEXT: s_mov_b32 s9, s10 17899; GFX10-WGP-NEXT: s_mov_b32 s8, s11 17900; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 17901; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 17902; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17903; GFX10-WGP-NEXT: s_mov_b32 s5, s8 17904; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 17905; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 17906; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17907; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 17908; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 17909; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 17910; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 17911; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17912; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17913; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 17914; GFX10-WGP-NEXT: buffer_gl1_inv 17915; GFX10-WGP-NEXT: buffer_gl0_inv 17916; GFX10-WGP-NEXT: s_endpgm 17917; 17918; GFX10-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17919; GFX10-CU: ; %bb.0: ; %entry 17920; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 17921; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 17922; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 17923; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 17924; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 17925; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 17926; GFX10-CU-NEXT: s_mov_b32 s4, s8 17927; GFX10-CU-NEXT: s_mov_b32 s5, s9 17928; GFX10-CU-NEXT: s_mov_b32 s9, s10 17929; GFX10-CU-NEXT: s_mov_b32 s8, s11 17930; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 17931; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 17932; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 17933; GFX10-CU-NEXT: s_mov_b32 s5, s8 17934; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 17935; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 17936; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17937; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 17938; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 17939; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 17940; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 17941; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17942; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17943; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 17944; GFX10-CU-NEXT: buffer_gl1_inv 17945; GFX10-CU-NEXT: buffer_gl0_inv 17946; GFX10-CU-NEXT: s_endpgm 17947; 17948; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17949; SKIP-CACHE-INV: ; %bb.0: ; %entry 17950; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 17951; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 17952; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 17953; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 17954; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 17955; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 17956; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 17957; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 17958; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 17959; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 17960; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 17961; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 17962; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 17963; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 17964; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 17965; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 17966; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17967; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 17968; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 17969; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 17970; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17971; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 17972; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 17973; SKIP-CACHE-INV-NEXT: s_endpgm 17974; 17975; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17976; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 17977; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17978; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17979; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17980; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 17981; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 17982; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 17983; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 17984; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 17985; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 17986; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 17987; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17988; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 17989; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 17990; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 17991; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 17992; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 17993; 17994; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 17995; GFX90A-TGSPLIT: ; %bb.0: ; %entry 17996; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 17997; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 17998; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 17999; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18000; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18001; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18002; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18003; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18004; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18005; GFX90A-TGSPLIT-NEXT: buffer_wbl2 18006; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18007; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 18008; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18009; GFX90A-TGSPLIT-NEXT: buffer_invl2 18010; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18011; GFX90A-TGSPLIT-NEXT: s_endpgm 18012; 18013; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18014; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18015; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18016; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18017; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18018; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18019; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18020; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18021; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18022; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18023; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18024; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18025; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18026; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 18027; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18028; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18029; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18030; 18031; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18032; GFX940-TGSPLIT: ; %bb.0: ; %entry 18033; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18034; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18035; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18036; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18037; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18038; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18039; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18040; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18041; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18042; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18043; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18044; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 18045; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18046; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18047; GFX940-TGSPLIT-NEXT: s_endpgm 18048; 18049; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18050; GFX11-WGP: ; %bb.0: ; %entry 18051; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18052; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18053; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18054; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18055; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 18056; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 18057; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18058; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 18059; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18060; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18061; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18062; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18063; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 18064; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18065; GFX11-WGP-NEXT: buffer_gl1_inv 18066; GFX11-WGP-NEXT: buffer_gl0_inv 18067; GFX11-WGP-NEXT: s_endpgm 18068; 18069; GFX11-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18070; GFX11-CU: ; %bb.0: ; %entry 18071; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18072; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18073; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18074; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18075; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 18076; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 18077; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18078; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 18079; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18080; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18081; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18082; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18083; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 18084; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18085; GFX11-CU-NEXT: buffer_gl1_inv 18086; GFX11-CU-NEXT: buffer_gl0_inv 18087; GFX11-CU-NEXT: s_endpgm 18088; 18089; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18090; GFX12-WGP: ; %bb.0: ; %entry 18091; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18092; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18093; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18094; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18095; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 18096; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 18097; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18098; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 18099; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18100; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18101; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 18102; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18103; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18104; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18105; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18106; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 18107; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18108; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18109; GFX12-WGP-NEXT: s_endpgm 18110; 18111; GFX12-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: 18112; GFX12-CU: ; %bb.0: ; %entry 18113; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18114; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18115; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18116; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18117; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 18118; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 18119; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18120; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 18121; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18122; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18123; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 18124; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18125; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18126; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18127; GFX12-CU-NEXT: s_wait_storecnt 0x0 18128; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 18129; GFX12-CU-NEXT: s_wait_storecnt 0x0 18130; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18131; GFX12-CU-NEXT: s_endpgm 18132 ptr %out, i32 %in, i32 %old) { 18133entry: 18134 %gep = getelementptr i32, ptr %out, i32 4 18135 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst 18136 ret void 18137} 18138 18139define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( 18140; GFX7-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18141; GFX7: ; %bb.0: ; %entry 18142; GFX7-NEXT: s_mov_b64 s[4:5], s[8:9] 18143; GFX7-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 18144; GFX7-NEXT: s_load_dword s7, s[4:5], 0x2 18145; GFX7-NEXT: s_load_dword s6, s[4:5], 0x3 18146; GFX7-NEXT: s_mov_b64 s[10:11], 16 18147; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18148; GFX7-NEXT: s_mov_b32 s4, s8 18149; GFX7-NEXT: s_mov_b32 s5, s9 18150; GFX7-NEXT: s_mov_b32 s9, s10 18151; GFX7-NEXT: s_mov_b32 s8, s11 18152; GFX7-NEXT: s_add_u32 s4, s4, s9 18153; GFX7-NEXT: s_addc_u32 s8, s5, s8 18154; GFX7-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 18155; GFX7-NEXT: s_mov_b32 s5, s8 18156; GFX7-NEXT: v_mov_b32_e32 v2, s7 18157; GFX7-NEXT: v_mov_b32_e32 v0, s6 18158; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18159; GFX7-NEXT: v_mov_b32_e32 v3, v0 18160; GFX7-NEXT: v_mov_b32_e32 v0, s4 18161; GFX7-NEXT: v_mov_b32_e32 v1, s5 18162; GFX7-NEXT: s_waitcnt vmcnt(0) 18163; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 18164; GFX7-NEXT: s_waitcnt vmcnt(0) 18165; GFX7-NEXT: buffer_wbinvl1_vol 18166; GFX7-NEXT: s_endpgm 18167; 18168; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18169; GFX10-WGP: ; %bb.0: ; %entry 18170; GFX10-WGP-NEXT: s_mov_b64 s[4:5], s[8:9] 18171; GFX10-WGP-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 18172; GFX10-WGP-NEXT: s_load_dword s7, s[4:5], 0x8 18173; GFX10-WGP-NEXT: s_load_dword s6, s[4:5], 0xc 18174; GFX10-WGP-NEXT: s_mov_b64 s[10:11], 16 18175; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18176; GFX10-WGP-NEXT: s_mov_b32 s4, s8 18177; GFX10-WGP-NEXT: s_mov_b32 s5, s9 18178; GFX10-WGP-NEXT: s_mov_b32 s9, s10 18179; GFX10-WGP-NEXT: s_mov_b32 s8, s11 18180; GFX10-WGP-NEXT: s_add_u32 s4, s4, s9 18181; GFX10-WGP-NEXT: s_addc_u32 s8, s5, s8 18182; GFX10-WGP-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 18183; GFX10-WGP-NEXT: s_mov_b32 s5, s8 18184; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s7 18185; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 18186; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18187; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 18188; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 18189; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 18190; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18191; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18192; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 18193; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18194; GFX10-WGP-NEXT: buffer_gl1_inv 18195; GFX10-WGP-NEXT: buffer_gl0_inv 18196; GFX10-WGP-NEXT: s_endpgm 18197; 18198; GFX10-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18199; GFX10-CU: ; %bb.0: ; %entry 18200; GFX10-CU-NEXT: s_mov_b64 s[4:5], s[8:9] 18201; GFX10-CU-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 18202; GFX10-CU-NEXT: s_load_dword s7, s[4:5], 0x8 18203; GFX10-CU-NEXT: s_load_dword s6, s[4:5], 0xc 18204; GFX10-CU-NEXT: s_mov_b64 s[10:11], 16 18205; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18206; GFX10-CU-NEXT: s_mov_b32 s4, s8 18207; GFX10-CU-NEXT: s_mov_b32 s5, s9 18208; GFX10-CU-NEXT: s_mov_b32 s9, s10 18209; GFX10-CU-NEXT: s_mov_b32 s8, s11 18210; GFX10-CU-NEXT: s_add_u32 s4, s4, s9 18211; GFX10-CU-NEXT: s_addc_u32 s8, s5, s8 18212; GFX10-CU-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 18213; GFX10-CU-NEXT: s_mov_b32 s5, s8 18214; GFX10-CU-NEXT: v_mov_b32_e32 v2, s7 18215; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 18216; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18217; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 18218; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 18219; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 18220; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18221; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18222; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 18223; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 18224; GFX10-CU-NEXT: buffer_gl1_inv 18225; GFX10-CU-NEXT: buffer_gl0_inv 18226; GFX10-CU-NEXT: s_endpgm 18227; 18228; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18229; SKIP-CACHE-INV: ; %bb.0: ; %entry 18230; SKIP-CACHE-INV-NEXT: s_mov_b64 s[0:1], s[4:5] 18231; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 18232; SKIP-CACHE-INV-NEXT: s_load_dword s3, s[0:1], 0x2 18233; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x3 18234; SKIP-CACHE-INV-NEXT: s_mov_b64 s[6:7], 16 18235; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18236; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 18237; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 18238; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s6 18239; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s7 18240; SKIP-CACHE-INV-NEXT: s_add_u32 s0, s0, s5 18241; SKIP-CACHE-INV-NEXT: s_addc_u32 s4, s1, s4 18242; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 18243; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s4 18244; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 18245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 18246; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18247; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 18248; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 18249; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 18250; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18251; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 18252; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18253; SKIP-CACHE-INV-NEXT: s_endpgm 18254; 18255; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18256; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18257; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18258; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18259; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18260; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18261; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18262; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18263; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18264; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18265; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18266; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 18267; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18268; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 18269; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18270; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 18271; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18272; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18273; 18274; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18275; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18276; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18277; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18278; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18279; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18280; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18281; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18282; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18283; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18284; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18285; GFX90A-TGSPLIT-NEXT: buffer_wbl2 18286; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18287; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 18288; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18289; GFX90A-TGSPLIT-NEXT: buffer_invl2 18290; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18291; GFX90A-TGSPLIT-NEXT: s_endpgm 18292; 18293; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18294; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18295; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18296; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18297; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18298; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18299; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18300; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18301; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18302; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18303; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18304; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18305; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18306; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 18307; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18308; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18309; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18310; 18311; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18312; GFX940-TGSPLIT: ; %bb.0: ; %entry 18313; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18314; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18315; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18316; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18317; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18318; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18319; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18320; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18321; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18322; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 18323; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18324; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 18325; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18326; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18327; GFX940-TGSPLIT-NEXT: s_endpgm 18328; 18329; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18330; GFX11-WGP: ; %bb.0: ; %entry 18331; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18332; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18333; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18334; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18335; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 18336; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 18337; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18338; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 18339; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18340; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18341; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18342; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18343; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 18344; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 18345; GFX11-WGP-NEXT: buffer_gl1_inv 18346; GFX11-WGP-NEXT: buffer_gl0_inv 18347; GFX11-WGP-NEXT: s_endpgm 18348; 18349; GFX11-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18350; GFX11-CU: ; %bb.0: ; %entry 18351; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18352; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18353; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18354; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18355; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 18356; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 18357; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18358; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 18359; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18360; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18361; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18362; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18363; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 18364; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 18365; GFX11-CU-NEXT: buffer_gl1_inv 18366; GFX11-CU-NEXT: buffer_gl0_inv 18367; GFX11-CU-NEXT: s_endpgm 18368; 18369; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18370; GFX12-WGP: ; %bb.0: ; %entry 18371; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18372; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18373; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18374; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18375; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 18376; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 18377; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18378; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 18379; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18380; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18381; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 18382; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 18383; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 18384; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18385; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18386; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 18387; GFX12-WGP-NEXT: s_wait_storecnt 0x0 18388; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18389; GFX12-WGP-NEXT: s_endpgm 18390; 18391; GFX12-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: 18392; GFX12-CU: ; %bb.0: ; %entry 18393; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18394; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18395; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18396; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18397; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 18398; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 18399; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18400; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 18401; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18402; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18403; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 18404; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 18405; GFX12-CU-NEXT: s_wait_samplecnt 0x0 18406; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18407; GFX12-CU-NEXT: s_wait_storecnt 0x0 18408; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS 18409; GFX12-CU-NEXT: s_wait_storecnt 0x0 18410; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18411; GFX12-CU-NEXT: s_endpgm 18412 ptr %out, i32 %in, i32 %old) { 18413entry: 18414 %gep = getelementptr i32, ptr %out, i32 4 18415 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 18416 ret void 18417} 18418 18419define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( 18420; GFX7-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18421; GFX7: ; %bb.0: ; %entry 18422; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18423; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18424; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18425; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18426; GFX7-NEXT: s_mov_b64 s[12:13], 16 18427; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18428; GFX7-NEXT: s_mov_b32 s6, s4 18429; GFX7-NEXT: s_mov_b32 s7, s5 18430; GFX7-NEXT: s_mov_b32 s11, s12 18431; GFX7-NEXT: s_mov_b32 s10, s13 18432; GFX7-NEXT: s_add_u32 s6, s6, s11 18433; GFX7-NEXT: s_addc_u32 s10, s7, s10 18434; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18435; GFX7-NEXT: s_mov_b32 s7, s10 18436; GFX7-NEXT: v_mov_b32_e32 v2, s9 18437; GFX7-NEXT: v_mov_b32_e32 v0, s8 18438; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18439; GFX7-NEXT: v_mov_b32_e32 v3, v0 18440; GFX7-NEXT: v_mov_b32_e32 v0, s6 18441; GFX7-NEXT: v_mov_b32_e32 v1, s7 18442; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18443; GFX7-NEXT: v_mov_b32_e32 v0, s4 18444; GFX7-NEXT: v_mov_b32_e32 v1, s5 18445; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18446; GFX7-NEXT: flat_store_dword v[0:1], v2 18447; GFX7-NEXT: s_endpgm 18448; 18449; GFX10-WGP-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18450; GFX10-WGP: ; %bb.0: ; %entry 18451; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 18452; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18453; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 18454; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 18455; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 18456; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18457; GFX10-WGP-NEXT: s_mov_b32 s6, s4 18458; GFX10-WGP-NEXT: s_mov_b32 s7, s5 18459; GFX10-WGP-NEXT: s_mov_b32 s11, s12 18460; GFX10-WGP-NEXT: s_mov_b32 s10, s13 18461; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 18462; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 18463; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18464; GFX10-WGP-NEXT: s_mov_b32 s7, s10 18465; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 18466; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 18467; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18468; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 18469; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 18470; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18471; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18472; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 18473; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 18474; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18475; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 18476; GFX10-WGP-NEXT: s_endpgm 18477; 18478; GFX10-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18479; GFX10-CU: ; %bb.0: ; %entry 18480; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 18481; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18482; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 18483; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 18484; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 18485; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18486; GFX10-CU-NEXT: s_mov_b32 s6, s4 18487; GFX10-CU-NEXT: s_mov_b32 s7, s5 18488; GFX10-CU-NEXT: s_mov_b32 s11, s12 18489; GFX10-CU-NEXT: s_mov_b32 s10, s13 18490; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 18491; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 18492; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18493; GFX10-CU-NEXT: s_mov_b32 s7, s10 18494; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 18495; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 18496; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18497; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 18498; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 18499; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18500; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18501; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 18502; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 18503; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18504; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 18505; GFX10-CU-NEXT: s_endpgm 18506; 18507; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18508; SKIP-CACHE-INV: ; %bb.0: ; %entry 18509; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18510; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18511; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18512; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18513; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 18514; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18515; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 18516; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 18517; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 18518; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 18519; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 18520; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 18521; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 18522; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18523; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 18524; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 18525; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18526; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 18527; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 18528; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 18529; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18530; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 18531; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 18532; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18533; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 18534; SKIP-CACHE-INV-NEXT: s_endpgm 18535; 18536; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18537; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18538; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18539; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18540; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18541; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18542; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18543; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18544; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18545; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18546; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18547; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 18548; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18549; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18550; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 18551; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18552; 18553; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18554; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18555; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18556; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18557; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18558; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18559; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18560; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18561; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18562; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18563; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18564; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 18565; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18566; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18567; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 18568; GFX90A-TGSPLIT-NEXT: s_endpgm 18569; 18570; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18571; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18572; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18573; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18574; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18575; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18576; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18577; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18578; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18579; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18580; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18581; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 18582; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18583; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18584; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 18585; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18586; 18587; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18588; GFX940-TGSPLIT: ; %bb.0: ; %entry 18589; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18590; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18591; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18592; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18593; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18594; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18595; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18596; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18597; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18598; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 18599; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18600; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18601; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 18602; GFX940-TGSPLIT-NEXT: s_endpgm 18603; 18604; GFX11-WGP-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18605; GFX11-WGP: ; %bb.0: ; %entry 18606; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18607; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18608; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18609; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18610; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 18611; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 18612; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18613; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 18614; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18615; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18616; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 18617; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18618; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18619; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18620; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 18621; GFX11-WGP-NEXT: s_endpgm 18622; 18623; GFX11-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18624; GFX11-CU: ; %bb.0: ; %entry 18625; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18626; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18627; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18628; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18629; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 18630; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 18631; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18632; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 18633; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18634; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18635; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 18636; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18637; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18638; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18639; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 18640; GFX11-CU-NEXT: s_endpgm 18641; 18642; GFX12-WGP-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18643; GFX12-WGP: ; %bb.0: ; %entry 18644; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18645; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18646; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18647; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18648; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 18649; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 18650; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18651; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 18652; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18653; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18654; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18655; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18656; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18657; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 18658; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 18659; GFX12-WGP-NEXT: s_endpgm 18660; 18661; GFX12-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: 18662; GFX12-CU: ; %bb.0: ; %entry 18663; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18664; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18665; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18666; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18667; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 18668; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 18669; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18670; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 18671; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18672; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18673; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18674; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18675; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18676; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 18677; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 18678; GFX12-CU-NEXT: s_endpgm 18679 ptr %out, i32 %in, i32 %old) { 18680entry: 18681 %gep = getelementptr i32, ptr %out, i32 4 18682 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 18683 %val0 = extractvalue { i32, i1 } %val, 0 18684 store i32 %val0, ptr %out, align 4 18685 ret void 18686} 18687 18688define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( 18689; GFX7-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18690; GFX7: ; %bb.0: ; %entry 18691; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18692; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18693; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18694; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18695; GFX7-NEXT: s_mov_b64 s[12:13], 16 18696; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18697; GFX7-NEXT: s_mov_b32 s6, s4 18698; GFX7-NEXT: s_mov_b32 s7, s5 18699; GFX7-NEXT: s_mov_b32 s11, s12 18700; GFX7-NEXT: s_mov_b32 s10, s13 18701; GFX7-NEXT: s_add_u32 s6, s6, s11 18702; GFX7-NEXT: s_addc_u32 s10, s7, s10 18703; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18704; GFX7-NEXT: s_mov_b32 s7, s10 18705; GFX7-NEXT: v_mov_b32_e32 v2, s9 18706; GFX7-NEXT: v_mov_b32_e32 v0, s8 18707; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18708; GFX7-NEXT: v_mov_b32_e32 v3, v0 18709; GFX7-NEXT: v_mov_b32_e32 v0, s6 18710; GFX7-NEXT: v_mov_b32_e32 v1, s7 18711; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18712; GFX7-NEXT: s_waitcnt vmcnt(0) 18713; GFX7-NEXT: buffer_wbinvl1_vol 18714; GFX7-NEXT: v_mov_b32_e32 v0, s4 18715; GFX7-NEXT: v_mov_b32_e32 v1, s5 18716; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18717; GFX7-NEXT: flat_store_dword v[0:1], v2 18718; GFX7-NEXT: s_endpgm 18719; 18720; GFX10-WGP-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18721; GFX10-WGP: ; %bb.0: ; %entry 18722; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 18723; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18724; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 18725; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 18726; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 18727; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18728; GFX10-WGP-NEXT: s_mov_b32 s6, s4 18729; GFX10-WGP-NEXT: s_mov_b32 s7, s5 18730; GFX10-WGP-NEXT: s_mov_b32 s11, s12 18731; GFX10-WGP-NEXT: s_mov_b32 s10, s13 18732; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 18733; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 18734; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18735; GFX10-WGP-NEXT: s_mov_b32 s7, s10 18736; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 18737; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 18738; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18739; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 18740; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 18741; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 18742; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18743; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 18744; GFX10-WGP-NEXT: buffer_gl1_inv 18745; GFX10-WGP-NEXT: buffer_gl0_inv 18746; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 18747; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 18748; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 18749; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 18750; GFX10-WGP-NEXT: s_endpgm 18751; 18752; GFX10-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18753; GFX10-CU: ; %bb.0: ; %entry 18754; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 18755; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18756; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 18757; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 18758; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 18759; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18760; GFX10-CU-NEXT: s_mov_b32 s6, s4 18761; GFX10-CU-NEXT: s_mov_b32 s7, s5 18762; GFX10-CU-NEXT: s_mov_b32 s11, s12 18763; GFX10-CU-NEXT: s_mov_b32 s10, s13 18764; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 18765; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 18766; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 18767; GFX10-CU-NEXT: s_mov_b32 s7, s10 18768; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 18769; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 18770; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18771; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 18772; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 18773; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 18774; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18775; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 18776; GFX10-CU-NEXT: buffer_gl1_inv 18777; GFX10-CU-NEXT: buffer_gl0_inv 18778; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 18779; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 18780; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 18781; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 18782; GFX10-CU-NEXT: s_endpgm 18783; 18784; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18785; SKIP-CACHE-INV: ; %bb.0: ; %entry 18786; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 18787; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 18788; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 18789; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 18790; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 18791; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18792; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 18793; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 18794; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 18795; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 18796; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 18797; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 18798; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 18799; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 18800; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 18801; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 18802; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18803; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 18804; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 18805; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 18806; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 18807; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 18808; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 18809; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 18810; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 18811; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 18812; SKIP-CACHE-INV-NEXT: s_endpgm 18813; 18814; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18815; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 18816; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18817; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18818; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18819; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18820; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18821; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18822; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18823; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18824; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18825; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 18826; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18827; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 18828; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 18829; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18830; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18831; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 18832; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 18833; 18834; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18835; GFX90A-TGSPLIT: ; %bb.0: ; %entry 18836; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 18837; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 18838; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 18839; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18840; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 18841; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 18842; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18843; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18844; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18845; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 18846; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18847; GFX90A-TGSPLIT-NEXT: buffer_invl2 18848; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 18849; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 18850; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 18851; GFX90A-TGSPLIT-NEXT: s_endpgm 18852; 18853; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18854; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 18855; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18856; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18857; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18858; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18859; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18860; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18861; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18862; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18863; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18864; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 18865; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 18866; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 18867; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18868; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18869; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 18870; GFX940-NOTTGSPLIT-NEXT: s_endpgm 18871; 18872; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18873; GFX940-TGSPLIT: ; %bb.0: ; %entry 18874; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 18875; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 18876; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 18877; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 18878; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 18879; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 18880; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18881; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 18882; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18883; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 18884; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 18885; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 18886; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 18887; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 18888; GFX940-TGSPLIT-NEXT: s_endpgm 18889; 18890; GFX11-WGP-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18891; GFX11-WGP: ; %bb.0: ; %entry 18892; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18893; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18894; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18895; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18896; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 18897; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 18898; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18899; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 18900; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18901; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18902; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 18903; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 18904; GFX11-WGP-NEXT: buffer_gl1_inv 18905; GFX11-WGP-NEXT: buffer_gl0_inv 18906; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 18907; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 18908; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 18909; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 18910; GFX11-WGP-NEXT: s_endpgm 18911; 18912; GFX11-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18913; GFX11-CU: ; %bb.0: ; %entry 18914; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18915; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18916; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18917; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18918; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 18919; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 18920; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18921; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 18922; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18923; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18924; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 18925; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 18926; GFX11-CU-NEXT: buffer_gl1_inv 18927; GFX11-CU-NEXT: buffer_gl0_inv 18928; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 18929; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 18930; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 18931; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 18932; GFX11-CU-NEXT: s_endpgm 18933; 18934; GFX12-WGP-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18935; GFX12-WGP: ; %bb.0: ; %entry 18936; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18937; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 18938; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 18939; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 18940; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 18941; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 18942; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18943; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 18944; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18945; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18946; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18947; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 18948; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 18949; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 18950; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 18951; GFX12-WGP-NEXT: s_wait_dscnt 0x0 18952; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 18953; GFX12-WGP-NEXT: s_endpgm 18954; 18955; GFX12-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: 18956; GFX12-CU: ; %bb.0: ; %entry 18957; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 18958; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 18959; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 18960; GFX12-CU-NEXT: s_wait_kmcnt 0x0 18961; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 18962; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 18963; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 18964; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 18965; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18966; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18967; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 18968; GFX12-CU-NEXT: s_wait_loadcnt 0x0 18969; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 18970; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 18971; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 18972; GFX12-CU-NEXT: s_wait_dscnt 0x0 18973; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 18974; GFX12-CU-NEXT: s_endpgm 18975 ptr %out, i32 %in, i32 %old) { 18976entry: 18977 %gep = getelementptr i32, ptr %out, i32 4 18978 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 18979 %val0 = extractvalue { i32, i1 } %val, 0 18980 store i32 %val0, ptr %out, align 4 18981 ret void 18982} 18983 18984define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( 18985; GFX7-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 18986; GFX7: ; %bb.0: ; %entry 18987; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 18988; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 18989; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 18990; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 18991; GFX7-NEXT: s_mov_b64 s[12:13], 16 18992; GFX7-NEXT: s_waitcnt lgkmcnt(0) 18993; GFX7-NEXT: s_mov_b32 s6, s4 18994; GFX7-NEXT: s_mov_b32 s7, s5 18995; GFX7-NEXT: s_mov_b32 s11, s12 18996; GFX7-NEXT: s_mov_b32 s10, s13 18997; GFX7-NEXT: s_add_u32 s6, s6, s11 18998; GFX7-NEXT: s_addc_u32 s10, s7, s10 18999; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19000; GFX7-NEXT: s_mov_b32 s7, s10 19001; GFX7-NEXT: v_mov_b32_e32 v2, s9 19002; GFX7-NEXT: v_mov_b32_e32 v0, s8 19003; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19004; GFX7-NEXT: v_mov_b32_e32 v3, v0 19005; GFX7-NEXT: v_mov_b32_e32 v0, s6 19006; GFX7-NEXT: v_mov_b32_e32 v1, s7 19007; GFX7-NEXT: s_waitcnt vmcnt(0) 19008; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19009; GFX7-NEXT: v_mov_b32_e32 v0, s4 19010; GFX7-NEXT: v_mov_b32_e32 v1, s5 19011; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19012; GFX7-NEXT: flat_store_dword v[0:1], v2 19013; GFX7-NEXT: s_endpgm 19014; 19015; GFX10-WGP-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19016; GFX10-WGP: ; %bb.0: ; %entry 19017; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 19018; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19019; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 19020; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 19021; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 19022; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19023; GFX10-WGP-NEXT: s_mov_b32 s6, s4 19024; GFX10-WGP-NEXT: s_mov_b32 s7, s5 19025; GFX10-WGP-NEXT: s_mov_b32 s11, s12 19026; GFX10-WGP-NEXT: s_mov_b32 s10, s13 19027; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 19028; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 19029; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19030; GFX10-WGP-NEXT: s_mov_b32 s7, s10 19031; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 19032; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 19033; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19034; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 19035; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 19036; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19037; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19038; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19039; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19040; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 19041; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 19042; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19043; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 19044; GFX10-WGP-NEXT: s_endpgm 19045; 19046; GFX10-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19047; GFX10-CU: ; %bb.0: ; %entry 19048; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 19049; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19050; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 19051; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 19052; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 19053; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19054; GFX10-CU-NEXT: s_mov_b32 s6, s4 19055; GFX10-CU-NEXT: s_mov_b32 s7, s5 19056; GFX10-CU-NEXT: s_mov_b32 s11, s12 19057; GFX10-CU-NEXT: s_mov_b32 s10, s13 19058; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 19059; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 19060; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19061; GFX10-CU-NEXT: s_mov_b32 s7, s10 19062; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 19063; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 19064; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19065; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 19066; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 19067; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19068; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19069; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19070; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19071; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 19072; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 19073; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19074; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 19075; GFX10-CU-NEXT: s_endpgm 19076; 19077; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19078; SKIP-CACHE-INV: ; %bb.0: ; %entry 19079; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19080; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19081; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19082; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19083; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 19084; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19085; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 19086; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 19087; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 19088; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 19089; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 19090; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 19091; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 19092; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19093; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 19094; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 19095; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19096; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 19097; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 19098; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 19099; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19100; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19101; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 19102; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 19103; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19104; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 19105; SKIP-CACHE-INV-NEXT: s_endpgm 19106; 19107; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19108; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19109; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19110; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19111; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19112; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19113; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19114; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19115; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19116; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19117; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19118; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19119; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19120; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19121; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19122; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19123; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 19124; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19125; 19126; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19127; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19128; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19129; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19130; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19131; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19132; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19133; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19134; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19135; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19136; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19137; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19138; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19139; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19140; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19141; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19142; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 19143; GFX90A-TGSPLIT-NEXT: s_endpgm 19144; 19145; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19146; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19147; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19148; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19149; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19150; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19151; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19152; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19153; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19154; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19155; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19156; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19157; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19158; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19159; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19160; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19161; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19162; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19163; 19164; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19165; GFX940-TGSPLIT: ; %bb.0: ; %entry 19166; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19167; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19168; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19169; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19170; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19171; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19172; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19173; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19174; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19175; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19176; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19177; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19178; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19179; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19180; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19181; GFX940-TGSPLIT-NEXT: s_endpgm 19182; 19183; GFX11-WGP-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19184; GFX11-WGP: ; %bb.0: ; %entry 19185; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19186; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19187; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19188; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19189; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 19190; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 19191; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19192; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 19193; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19194; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19195; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19196; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19197; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19198; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19199; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19200; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19201; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 19202; GFX11-WGP-NEXT: s_endpgm 19203; 19204; GFX11-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19205; GFX11-CU: ; %bb.0: ; %entry 19206; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19207; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19208; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19209; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19210; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 19211; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 19212; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19213; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 19214; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19215; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19216; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19217; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19218; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19219; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19220; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19221; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 19222; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 19223; GFX11-CU-NEXT: s_endpgm 19224; 19225; GFX12-WGP-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19226; GFX12-WGP: ; %bb.0: ; %entry 19227; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19228; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19229; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19230; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19231; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 19232; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 19233; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19234; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 19235; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19236; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19237; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19238; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19239; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19240; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19241; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19242; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19243; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19244; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19245; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 19246; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 19247; GFX12-WGP-NEXT: s_endpgm 19248; 19249; GFX12-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: 19250; GFX12-CU: ; %bb.0: ; %entry 19251; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19252; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19253; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19254; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19255; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 19256; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 19257; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19258; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 19259; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19260; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19261; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19262; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19263; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19264; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19265; GFX12-CU-NEXT: s_wait_storecnt 0x0 19266; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19267; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19268; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19269; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 19270; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 19271; GFX12-CU-NEXT: s_endpgm 19272 ptr %out, i32 %in, i32 %old) { 19273entry: 19274 %gep = getelementptr i32, ptr %out, i32 4 19275 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 19276 %val0 = extractvalue { i32, i1 } %val, 0 19277 store i32 %val0, ptr %out, align 4 19278 ret void 19279} 19280 19281define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( 19282; GFX7-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19283; GFX7: ; %bb.0: ; %entry 19284; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19285; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19286; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19287; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19288; GFX7-NEXT: s_mov_b64 s[12:13], 16 19289; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19290; GFX7-NEXT: s_mov_b32 s6, s4 19291; GFX7-NEXT: s_mov_b32 s7, s5 19292; GFX7-NEXT: s_mov_b32 s11, s12 19293; GFX7-NEXT: s_mov_b32 s10, s13 19294; GFX7-NEXT: s_add_u32 s6, s6, s11 19295; GFX7-NEXT: s_addc_u32 s10, s7, s10 19296; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19297; GFX7-NEXT: s_mov_b32 s7, s10 19298; GFX7-NEXT: v_mov_b32_e32 v2, s9 19299; GFX7-NEXT: v_mov_b32_e32 v0, s8 19300; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19301; GFX7-NEXT: v_mov_b32_e32 v3, v0 19302; GFX7-NEXT: v_mov_b32_e32 v0, s6 19303; GFX7-NEXT: v_mov_b32_e32 v1, s7 19304; GFX7-NEXT: s_waitcnt vmcnt(0) 19305; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19306; GFX7-NEXT: s_waitcnt vmcnt(0) 19307; GFX7-NEXT: buffer_wbinvl1_vol 19308; GFX7-NEXT: v_mov_b32_e32 v0, s4 19309; GFX7-NEXT: v_mov_b32_e32 v1, s5 19310; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19311; GFX7-NEXT: flat_store_dword v[0:1], v2 19312; GFX7-NEXT: s_endpgm 19313; 19314; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19315; GFX10-WGP: ; %bb.0: ; %entry 19316; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 19317; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19318; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 19319; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 19320; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 19321; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19322; GFX10-WGP-NEXT: s_mov_b32 s6, s4 19323; GFX10-WGP-NEXT: s_mov_b32 s7, s5 19324; GFX10-WGP-NEXT: s_mov_b32 s11, s12 19325; GFX10-WGP-NEXT: s_mov_b32 s10, s13 19326; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 19327; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 19328; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19329; GFX10-WGP-NEXT: s_mov_b32 s7, s10 19330; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 19331; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 19332; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19333; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 19334; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 19335; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19336; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19337; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19338; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19339; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19340; GFX10-WGP-NEXT: buffer_gl1_inv 19341; GFX10-WGP-NEXT: buffer_gl0_inv 19342; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 19343; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 19344; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19345; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 19346; GFX10-WGP-NEXT: s_endpgm 19347; 19348; GFX10-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19349; GFX10-CU: ; %bb.0: ; %entry 19350; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 19351; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19352; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 19353; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 19354; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 19355; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19356; GFX10-CU-NEXT: s_mov_b32 s6, s4 19357; GFX10-CU-NEXT: s_mov_b32 s7, s5 19358; GFX10-CU-NEXT: s_mov_b32 s11, s12 19359; GFX10-CU-NEXT: s_mov_b32 s10, s13 19360; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 19361; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 19362; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19363; GFX10-CU-NEXT: s_mov_b32 s7, s10 19364; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 19365; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 19366; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19367; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 19368; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 19369; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19370; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19371; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19372; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19373; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19374; GFX10-CU-NEXT: buffer_gl1_inv 19375; GFX10-CU-NEXT: buffer_gl0_inv 19376; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 19377; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 19378; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19379; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 19380; GFX10-CU-NEXT: s_endpgm 19381; 19382; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19383; SKIP-CACHE-INV: ; %bb.0: ; %entry 19384; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19385; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19386; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19387; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19388; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 19389; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19390; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 19391; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 19392; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 19393; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 19394; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 19395; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 19396; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 19397; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19398; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 19399; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 19400; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19401; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 19402; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 19403; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 19404; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19405; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19406; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19407; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 19408; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 19409; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19410; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 19411; SKIP-CACHE-INV-NEXT: s_endpgm 19412; 19413; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19414; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19415; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19416; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19417; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19418; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19419; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19420; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19421; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19422; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19423; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19424; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19425; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19426; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19427; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19428; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19429; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19430; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19431; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19432; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 19433; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19434; 19435; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19436; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19437; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19438; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19439; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19440; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19441; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19442; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19443; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19444; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19445; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19446; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19447; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19448; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19449; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19450; GFX90A-TGSPLIT-NEXT: buffer_invl2 19451; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19452; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19453; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 19454; GFX90A-TGSPLIT-NEXT: s_endpgm 19455; 19456; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19457; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19458; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19459; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19460; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19461; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19462; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19463; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19464; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19465; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19466; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19467; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19468; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19469; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19470; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19471; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19472; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19473; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19474; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19475; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19476; 19477; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19478; GFX940-TGSPLIT: ; %bb.0: ; %entry 19479; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19480; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19481; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19482; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19483; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19484; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19485; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19486; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19487; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19488; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19489; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19490; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19491; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19492; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 19493; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19494; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19495; GFX940-TGSPLIT-NEXT: s_endpgm 19496; 19497; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19498; GFX11-WGP: ; %bb.0: ; %entry 19499; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19500; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19501; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19502; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19503; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 19504; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 19505; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19506; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 19507; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19508; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19509; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19510; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19511; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19512; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19513; GFX11-WGP-NEXT: buffer_gl1_inv 19514; GFX11-WGP-NEXT: buffer_gl0_inv 19515; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19516; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19517; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19518; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 19519; GFX11-WGP-NEXT: s_endpgm 19520; 19521; GFX11-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19522; GFX11-CU: ; %bb.0: ; %entry 19523; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19524; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19525; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19526; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19527; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 19528; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 19529; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19530; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 19531; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19532; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19533; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19534; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19535; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19536; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19537; GFX11-CU-NEXT: buffer_gl1_inv 19538; GFX11-CU-NEXT: buffer_gl0_inv 19539; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19540; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19541; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19542; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 19543; GFX11-CU-NEXT: s_endpgm 19544; 19545; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19546; GFX12-WGP: ; %bb.0: ; %entry 19547; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19548; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19549; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19550; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19551; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 19552; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 19553; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19554; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 19555; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19556; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19557; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19558; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19559; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19560; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19561; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19562; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19563; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19564; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19565; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19566; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 19567; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19568; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19569; GFX12-WGP-NEXT: s_wait_dscnt 0x0 19570; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 19571; GFX12-WGP-NEXT: s_endpgm 19572; 19573; GFX12-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: 19574; GFX12-CU: ; %bb.0: ; %entry 19575; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19576; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19577; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19578; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19579; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 19580; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 19581; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19582; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 19583; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19584; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19585; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19586; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19587; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19588; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19589; GFX12-CU-NEXT: s_wait_storecnt 0x0 19590; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19591; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19592; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19593; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19594; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 19595; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19596; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19597; GFX12-CU-NEXT: s_wait_dscnt 0x0 19598; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 19599; GFX12-CU-NEXT: s_endpgm 19600 ptr %out, i32 %in, i32 %old) { 19601entry: 19602 %gep = getelementptr i32, ptr %out, i32 4 19603 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 19604 %val0 = extractvalue { i32, i1 } %val, 0 19605 store i32 %val0, ptr %out, align 4 19606 ret void 19607} 19608 19609define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( 19610; GFX7-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19611; GFX7: ; %bb.0: ; %entry 19612; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19613; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19614; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19615; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19616; GFX7-NEXT: s_mov_b64 s[12:13], 16 19617; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19618; GFX7-NEXT: s_mov_b32 s6, s4 19619; GFX7-NEXT: s_mov_b32 s7, s5 19620; GFX7-NEXT: s_mov_b32 s11, s12 19621; GFX7-NEXT: s_mov_b32 s10, s13 19622; GFX7-NEXT: s_add_u32 s6, s6, s11 19623; GFX7-NEXT: s_addc_u32 s10, s7, s10 19624; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19625; GFX7-NEXT: s_mov_b32 s7, s10 19626; GFX7-NEXT: v_mov_b32_e32 v2, s9 19627; GFX7-NEXT: v_mov_b32_e32 v0, s8 19628; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19629; GFX7-NEXT: v_mov_b32_e32 v3, v0 19630; GFX7-NEXT: v_mov_b32_e32 v0, s6 19631; GFX7-NEXT: v_mov_b32_e32 v1, s7 19632; GFX7-NEXT: s_waitcnt vmcnt(0) 19633; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19634; GFX7-NEXT: s_waitcnt vmcnt(0) 19635; GFX7-NEXT: buffer_wbinvl1_vol 19636; GFX7-NEXT: v_mov_b32_e32 v0, s4 19637; GFX7-NEXT: v_mov_b32_e32 v1, s5 19638; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19639; GFX7-NEXT: flat_store_dword v[0:1], v2 19640; GFX7-NEXT: s_endpgm 19641; 19642; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19643; GFX10-WGP: ; %bb.0: ; %entry 19644; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 19645; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19646; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 19647; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 19648; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 19649; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19650; GFX10-WGP-NEXT: s_mov_b32 s6, s4 19651; GFX10-WGP-NEXT: s_mov_b32 s7, s5 19652; GFX10-WGP-NEXT: s_mov_b32 s11, s12 19653; GFX10-WGP-NEXT: s_mov_b32 s10, s13 19654; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 19655; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 19656; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19657; GFX10-WGP-NEXT: s_mov_b32 s7, s10 19658; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 19659; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 19660; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19661; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 19662; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 19663; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19664; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19665; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19666; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19667; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19668; GFX10-WGP-NEXT: buffer_gl1_inv 19669; GFX10-WGP-NEXT: buffer_gl0_inv 19670; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 19671; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 19672; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19673; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 19674; GFX10-WGP-NEXT: s_endpgm 19675; 19676; GFX10-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19677; GFX10-CU: ; %bb.0: ; %entry 19678; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 19679; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19680; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 19681; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 19682; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 19683; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19684; GFX10-CU-NEXT: s_mov_b32 s6, s4 19685; GFX10-CU-NEXT: s_mov_b32 s7, s5 19686; GFX10-CU-NEXT: s_mov_b32 s11, s12 19687; GFX10-CU-NEXT: s_mov_b32 s10, s13 19688; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 19689; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 19690; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19691; GFX10-CU-NEXT: s_mov_b32 s7, s10 19692; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 19693; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 19694; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19695; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 19696; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 19697; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 19698; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19699; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 19700; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19701; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 19702; GFX10-CU-NEXT: buffer_gl1_inv 19703; GFX10-CU-NEXT: buffer_gl0_inv 19704; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 19705; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 19706; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 19707; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 19708; GFX10-CU-NEXT: s_endpgm 19709; 19710; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19711; SKIP-CACHE-INV: ; %bb.0: ; %entry 19712; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 19713; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 19714; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 19715; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 19716; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 19717; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19718; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 19719; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 19720; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 19721; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 19722; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 19723; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 19724; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 19725; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 19726; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 19727; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 19728; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19729; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 19730; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 19731; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 19732; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19733; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19734; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 19735; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 19736; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 19737; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 19738; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 19739; SKIP-CACHE-INV-NEXT: s_endpgm 19740; 19741; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19742; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 19743; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19744; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19745; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19746; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19747; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19748; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19749; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19750; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19751; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19752; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 19753; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19754; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19755; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19756; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 19757; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 19758; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19759; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19760; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 19761; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 19762; 19763; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19764; GFX90A-TGSPLIT: ; %bb.0: ; %entry 19765; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 19766; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 19767; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 19768; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19769; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 19770; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 19771; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19772; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19773; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19774; GFX90A-TGSPLIT-NEXT: buffer_wbl2 19775; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19776; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 19777; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19778; GFX90A-TGSPLIT-NEXT: buffer_invl2 19779; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 19780; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 19781; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 19782; GFX90A-TGSPLIT-NEXT: s_endpgm 19783; 19784; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19785; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 19786; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19787; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19788; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19789; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19790; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19791; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19792; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19793; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19794; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19795; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19796; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19797; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19798; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 19799; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 19800; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19801; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19802; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19803; GFX940-NOTTGSPLIT-NEXT: s_endpgm 19804; 19805; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19806; GFX940-TGSPLIT: ; %bb.0: ; %entry 19807; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 19808; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 19809; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 19810; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 19811; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 19812; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 19813; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19814; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 19815; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19816; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 19817; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19818; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 19819; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 19820; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 19821; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 19822; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 19823; GFX940-TGSPLIT-NEXT: s_endpgm 19824; 19825; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19826; GFX11-WGP: ; %bb.0: ; %entry 19827; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19828; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19829; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19830; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19831; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 19832; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 19833; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19834; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 19835; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19836; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19837; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19838; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 19839; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19840; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 19841; GFX11-WGP-NEXT: buffer_gl1_inv 19842; GFX11-WGP-NEXT: buffer_gl0_inv 19843; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 19844; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 19845; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 19846; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 19847; GFX11-WGP-NEXT: s_endpgm 19848; 19849; GFX11-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19850; GFX11-CU: ; %bb.0: ; %entry 19851; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19852; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19853; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19854; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19855; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 19856; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 19857; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19858; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 19859; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19860; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19861; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19862; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 19863; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 19864; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 19865; GFX11-CU-NEXT: buffer_gl1_inv 19866; GFX11-CU-NEXT: buffer_gl0_inv 19867; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 19868; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 19869; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 19870; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 19871; GFX11-CU-NEXT: s_endpgm 19872; 19873; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19874; GFX12-WGP: ; %bb.0: ; %entry 19875; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19876; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 19877; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 19878; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 19879; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 19880; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 19881; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19882; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 19883; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19884; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19885; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 19886; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19887; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19888; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19889; GFX12-WGP-NEXT: s_wait_storecnt 0x0 19890; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19891; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 19892; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 19893; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 19894; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 19895; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 19896; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 19897; GFX12-WGP-NEXT: s_wait_dscnt 0x0 19898; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 19899; GFX12-WGP-NEXT: s_endpgm 19900; 19901; GFX12-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: 19902; GFX12-CU: ; %bb.0: ; %entry 19903; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 19904; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 19905; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 19906; GFX12-CU-NEXT: s_wait_kmcnt 0x0 19907; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 19908; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 19909; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19910; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 19911; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19912; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19913; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 19914; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19915; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19916; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19917; GFX12-CU-NEXT: s_wait_storecnt 0x0 19918; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 19919; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 19920; GFX12-CU-NEXT: s_wait_samplecnt 0x0 19921; GFX12-CU-NEXT: s_wait_loadcnt 0x0 19922; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 19923; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 19924; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 19925; GFX12-CU-NEXT: s_wait_dscnt 0x0 19926; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 19927; GFX12-CU-NEXT: s_endpgm 19928 ptr %out, i32 %in, i32 %old) { 19929entry: 19930 %gep = getelementptr i32, ptr %out, i32 4 19931 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 19932 %val0 = extractvalue { i32, i1 } %val, 0 19933 store i32 %val0, ptr %out, align 4 19934 ret void 19935} 19936 19937define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( 19938; GFX7-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 19939; GFX7: ; %bb.0: ; %entry 19940; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 19941; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19942; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 19943; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 19944; GFX7-NEXT: s_mov_b64 s[12:13], 16 19945; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19946; GFX7-NEXT: s_mov_b32 s6, s4 19947; GFX7-NEXT: s_mov_b32 s7, s5 19948; GFX7-NEXT: s_mov_b32 s11, s12 19949; GFX7-NEXT: s_mov_b32 s10, s13 19950; GFX7-NEXT: s_add_u32 s6, s6, s11 19951; GFX7-NEXT: s_addc_u32 s10, s7, s10 19952; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19953; GFX7-NEXT: s_mov_b32 s7, s10 19954; GFX7-NEXT: v_mov_b32_e32 v2, s9 19955; GFX7-NEXT: v_mov_b32_e32 v0, s8 19956; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19957; GFX7-NEXT: v_mov_b32_e32 v3, v0 19958; GFX7-NEXT: v_mov_b32_e32 v0, s6 19959; GFX7-NEXT: v_mov_b32_e32 v1, s7 19960; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19961; GFX7-NEXT: s_waitcnt vmcnt(0) 19962; GFX7-NEXT: buffer_wbinvl1_vol 19963; GFX7-NEXT: v_mov_b32_e32 v0, s4 19964; GFX7-NEXT: v_mov_b32_e32 v1, s5 19965; GFX7-NEXT: s_waitcnt lgkmcnt(0) 19966; GFX7-NEXT: flat_store_dword v[0:1], v2 19967; GFX7-NEXT: s_endpgm 19968; 19969; GFX10-WGP-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 19970; GFX10-WGP: ; %bb.0: ; %entry 19971; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 19972; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 19973; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 19974; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 19975; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 19976; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19977; GFX10-WGP-NEXT: s_mov_b32 s6, s4 19978; GFX10-WGP-NEXT: s_mov_b32 s7, s5 19979; GFX10-WGP-NEXT: s_mov_b32 s11, s12 19980; GFX10-WGP-NEXT: s_mov_b32 s10, s13 19981; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 19982; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 19983; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 19984; GFX10-WGP-NEXT: s_mov_b32 s7, s10 19985; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 19986; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 19987; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 19988; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 19989; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 19990; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 19991; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 19992; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 19993; GFX10-WGP-NEXT: buffer_gl1_inv 19994; GFX10-WGP-NEXT: buffer_gl0_inv 19995; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 19996; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 19997; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 19998; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 19999; GFX10-WGP-NEXT: s_endpgm 20000; 20001; GFX10-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20002; GFX10-CU: ; %bb.0: ; %entry 20003; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 20004; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20005; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 20006; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 20007; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 20008; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20009; GFX10-CU-NEXT: s_mov_b32 s6, s4 20010; GFX10-CU-NEXT: s_mov_b32 s7, s5 20011; GFX10-CU-NEXT: s_mov_b32 s11, s12 20012; GFX10-CU-NEXT: s_mov_b32 s10, s13 20013; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 20014; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 20015; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20016; GFX10-CU-NEXT: s_mov_b32 s7, s10 20017; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 20018; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 20019; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20020; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 20021; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 20022; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20023; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20024; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20025; GFX10-CU-NEXT: buffer_gl1_inv 20026; GFX10-CU-NEXT: buffer_gl0_inv 20027; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 20028; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 20029; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20030; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 20031; GFX10-CU-NEXT: s_endpgm 20032; 20033; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20034; SKIP-CACHE-INV: ; %bb.0: ; %entry 20035; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20036; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20037; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20038; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20039; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 20040; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20041; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 20042; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 20043; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 20044; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 20045; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 20046; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 20047; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 20048; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20049; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 20050; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 20051; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20052; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 20053; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 20054; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 20055; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20056; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20057; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 20058; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 20059; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20060; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 20061; SKIP-CACHE-INV-NEXT: s_endpgm 20062; 20063; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20064; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20065; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20066; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20067; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20068; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20069; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20070; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20071; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20072; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20073; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20074; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20075; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20076; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20077; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20078; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20079; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20080; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 20081; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20082; 20083; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20084; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20085; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20086; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20087; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20088; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20089; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20090; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20091; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20092; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20093; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20094; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20095; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20096; GFX90A-TGSPLIT-NEXT: buffer_invl2 20097; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20098; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20099; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 20100; GFX90A-TGSPLIT-NEXT: s_endpgm 20101; 20102; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20103; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20104; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20105; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20106; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20107; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20108; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20109; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20110; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20111; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20112; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20113; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20114; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20115; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20116; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20117; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20118; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20119; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20120; 20121; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20122; GFX940-TGSPLIT: ; %bb.0: ; %entry 20123; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20124; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20125; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20126; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20127; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20128; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20129; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20130; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20131; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20132; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20133; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20134; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20135; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20136; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20137; GFX940-TGSPLIT-NEXT: s_endpgm 20138; 20139; GFX11-WGP-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20140; GFX11-WGP: ; %bb.0: ; %entry 20141; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20142; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20143; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20144; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20145; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 20146; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 20147; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20148; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 20149; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20150; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20151; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20152; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20153; GFX11-WGP-NEXT: buffer_gl1_inv 20154; GFX11-WGP-NEXT: buffer_gl0_inv 20155; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20156; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20157; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20158; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 20159; GFX11-WGP-NEXT: s_endpgm 20160; 20161; GFX11-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20162; GFX11-CU: ; %bb.0: ; %entry 20163; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20164; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20165; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20166; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20167; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 20168; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 20169; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20170; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 20171; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20172; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20173; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20174; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20175; GFX11-CU-NEXT: buffer_gl1_inv 20176; GFX11-CU-NEXT: buffer_gl0_inv 20177; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20178; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20179; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20180; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 20181; GFX11-CU-NEXT: s_endpgm 20182; 20183; GFX12-WGP-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20184; GFX12-WGP: ; %bb.0: ; %entry 20185; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20186; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20187; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20188; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20189; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 20190; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 20191; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20192; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 20193; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20194; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20195; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20196; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20197; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20198; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20199; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20200; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20201; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20202; GFX12-WGP-NEXT: s_wait_dscnt 0x0 20203; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 20204; GFX12-WGP-NEXT: s_endpgm 20205; 20206; GFX12-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: 20207; GFX12-CU: ; %bb.0: ; %entry 20208; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20209; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20210; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20211; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20212; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 20213; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 20214; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20215; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 20216; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20217; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20218; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20219; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20220; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20221; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20222; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20223; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20224; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20225; GFX12-CU-NEXT: s_wait_dscnt 0x0 20226; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 20227; GFX12-CU-NEXT: s_endpgm 20228 ptr %out, i32 %in, i32 %old) { 20229entry: 20230 %gep = getelementptr i32, ptr %out, i32 4 20231 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire 20232 %val0 = extractvalue { i32, i1 } %val, 0 20233 store i32 %val0, ptr %out, align 4 20234 ret void 20235} 20236 20237define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( 20238; GFX7-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20239; GFX7: ; %bb.0: ; %entry 20240; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20241; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20242; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20243; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20244; GFX7-NEXT: s_mov_b64 s[12:13], 16 20245; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20246; GFX7-NEXT: s_mov_b32 s6, s4 20247; GFX7-NEXT: s_mov_b32 s7, s5 20248; GFX7-NEXT: s_mov_b32 s11, s12 20249; GFX7-NEXT: s_mov_b32 s10, s13 20250; GFX7-NEXT: s_add_u32 s6, s6, s11 20251; GFX7-NEXT: s_addc_u32 s10, s7, s10 20252; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20253; GFX7-NEXT: s_mov_b32 s7, s10 20254; GFX7-NEXT: v_mov_b32_e32 v2, s9 20255; GFX7-NEXT: v_mov_b32_e32 v0, s8 20256; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20257; GFX7-NEXT: v_mov_b32_e32 v3, v0 20258; GFX7-NEXT: v_mov_b32_e32 v0, s6 20259; GFX7-NEXT: v_mov_b32_e32 v1, s7 20260; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20261; GFX7-NEXT: s_waitcnt vmcnt(0) 20262; GFX7-NEXT: buffer_wbinvl1_vol 20263; GFX7-NEXT: v_mov_b32_e32 v0, s4 20264; GFX7-NEXT: v_mov_b32_e32 v1, s5 20265; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20266; GFX7-NEXT: flat_store_dword v[0:1], v2 20267; GFX7-NEXT: s_endpgm 20268; 20269; GFX10-WGP-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20270; GFX10-WGP: ; %bb.0: ; %entry 20271; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 20272; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20273; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 20274; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 20275; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 20276; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20277; GFX10-WGP-NEXT: s_mov_b32 s6, s4 20278; GFX10-WGP-NEXT: s_mov_b32 s7, s5 20279; GFX10-WGP-NEXT: s_mov_b32 s11, s12 20280; GFX10-WGP-NEXT: s_mov_b32 s10, s13 20281; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 20282; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 20283; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20284; GFX10-WGP-NEXT: s_mov_b32 s7, s10 20285; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 20286; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 20287; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20288; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 20289; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 20290; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20291; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20292; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20293; GFX10-WGP-NEXT: buffer_gl1_inv 20294; GFX10-WGP-NEXT: buffer_gl0_inv 20295; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 20296; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 20297; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20298; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 20299; GFX10-WGP-NEXT: s_endpgm 20300; 20301; GFX10-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20302; GFX10-CU: ; %bb.0: ; %entry 20303; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 20304; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20305; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 20306; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 20307; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 20308; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20309; GFX10-CU-NEXT: s_mov_b32 s6, s4 20310; GFX10-CU-NEXT: s_mov_b32 s7, s5 20311; GFX10-CU-NEXT: s_mov_b32 s11, s12 20312; GFX10-CU-NEXT: s_mov_b32 s10, s13 20313; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 20314; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 20315; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20316; GFX10-CU-NEXT: s_mov_b32 s7, s10 20317; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 20318; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 20319; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20320; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 20321; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 20322; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20323; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20324; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20325; GFX10-CU-NEXT: buffer_gl1_inv 20326; GFX10-CU-NEXT: buffer_gl0_inv 20327; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 20328; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 20329; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20330; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 20331; GFX10-CU-NEXT: s_endpgm 20332; 20333; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20334; SKIP-CACHE-INV: ; %bb.0: ; %entry 20335; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20336; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20337; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20338; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20339; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 20340; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20341; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 20342; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 20343; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 20344; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 20345; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 20346; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 20347; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 20348; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20349; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 20350; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 20351; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20352; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 20353; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 20354; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 20355; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20356; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20357; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 20358; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 20359; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20360; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 20361; SKIP-CACHE-INV-NEXT: s_endpgm 20362; 20363; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20364; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20365; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20366; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20367; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20368; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20369; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20370; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20371; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20372; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20373; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20374; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20375; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20376; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20377; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20378; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20379; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20380; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 20381; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20382; 20383; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20384; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20385; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20386; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20387; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20388; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20389; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20390; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20391; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20392; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20393; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20394; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20395; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20396; GFX90A-TGSPLIT-NEXT: buffer_invl2 20397; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20398; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20399; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 20400; GFX90A-TGSPLIT-NEXT: s_endpgm 20401; 20402; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20403; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20404; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20405; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20406; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20407; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20408; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20409; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20410; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20411; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20412; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20413; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20414; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20415; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20416; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20417; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20418; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20419; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20420; 20421; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20422; GFX940-TGSPLIT: ; %bb.0: ; %entry 20423; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20424; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20425; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20426; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20427; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20428; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20429; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20430; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20431; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20432; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20433; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20434; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20435; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20436; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20437; GFX940-TGSPLIT-NEXT: s_endpgm 20438; 20439; GFX11-WGP-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20440; GFX11-WGP: ; %bb.0: ; %entry 20441; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20442; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20443; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20444; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20445; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 20446; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 20447; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20448; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 20449; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20450; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20451; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20452; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20453; GFX11-WGP-NEXT: buffer_gl1_inv 20454; GFX11-WGP-NEXT: buffer_gl0_inv 20455; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20456; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20457; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20458; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 20459; GFX11-WGP-NEXT: s_endpgm 20460; 20461; GFX11-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20462; GFX11-CU: ; %bb.0: ; %entry 20463; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20464; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20465; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20466; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20467; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 20468; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 20469; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20470; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 20471; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20472; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20473; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20474; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20475; GFX11-CU-NEXT: buffer_gl1_inv 20476; GFX11-CU-NEXT: buffer_gl0_inv 20477; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20478; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20479; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20480; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 20481; GFX11-CU-NEXT: s_endpgm 20482; 20483; GFX12-WGP-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20484; GFX12-WGP: ; %bb.0: ; %entry 20485; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20486; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20487; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20488; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20489; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 20490; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 20491; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20492; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 20493; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20494; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20495; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20496; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20497; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20498; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20499; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20500; GFX12-WGP-NEXT: s_wait_dscnt 0x0 20501; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 20502; GFX12-WGP-NEXT: s_endpgm 20503; 20504; GFX12-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: 20505; GFX12-CU: ; %bb.0: ; %entry 20506; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20507; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20508; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20509; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20510; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 20511; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 20512; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20513; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 20514; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20515; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20516; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20517; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20518; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20519; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20520; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20521; GFX12-CU-NEXT: s_wait_dscnt 0x0 20522; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 20523; GFX12-CU-NEXT: s_endpgm 20524 ptr %out, i32 %in, i32 %old) { 20525entry: 20526 %gep = getelementptr i32, ptr %out, i32 4 20527 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 20528 %val0 = extractvalue { i32, i1 } %val, 0 20529 store i32 %val0, ptr %out, align 4 20530 ret void 20531} 20532 20533define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( 20534; GFX7-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20535; GFX7: ; %bb.0: ; %entry 20536; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20537; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20538; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20539; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20540; GFX7-NEXT: s_mov_b64 s[12:13], 16 20541; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20542; GFX7-NEXT: s_mov_b32 s6, s4 20543; GFX7-NEXT: s_mov_b32 s7, s5 20544; GFX7-NEXT: s_mov_b32 s11, s12 20545; GFX7-NEXT: s_mov_b32 s10, s13 20546; GFX7-NEXT: s_add_u32 s6, s6, s11 20547; GFX7-NEXT: s_addc_u32 s10, s7, s10 20548; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20549; GFX7-NEXT: s_mov_b32 s7, s10 20550; GFX7-NEXT: v_mov_b32_e32 v2, s9 20551; GFX7-NEXT: v_mov_b32_e32 v0, s8 20552; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20553; GFX7-NEXT: v_mov_b32_e32 v3, v0 20554; GFX7-NEXT: v_mov_b32_e32 v0, s6 20555; GFX7-NEXT: v_mov_b32_e32 v1, s7 20556; GFX7-NEXT: s_waitcnt vmcnt(0) 20557; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20558; GFX7-NEXT: s_waitcnt vmcnt(0) 20559; GFX7-NEXT: buffer_wbinvl1_vol 20560; GFX7-NEXT: v_mov_b32_e32 v0, s4 20561; GFX7-NEXT: v_mov_b32_e32 v1, s5 20562; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20563; GFX7-NEXT: flat_store_dword v[0:1], v2 20564; GFX7-NEXT: s_endpgm 20565; 20566; GFX10-WGP-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20567; GFX10-WGP: ; %bb.0: ; %entry 20568; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 20569; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20570; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 20571; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 20572; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 20573; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20574; GFX10-WGP-NEXT: s_mov_b32 s6, s4 20575; GFX10-WGP-NEXT: s_mov_b32 s7, s5 20576; GFX10-WGP-NEXT: s_mov_b32 s11, s12 20577; GFX10-WGP-NEXT: s_mov_b32 s10, s13 20578; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 20579; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 20580; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20581; GFX10-WGP-NEXT: s_mov_b32 s7, s10 20582; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 20583; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 20584; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20585; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 20586; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 20587; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20588; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20589; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20590; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20591; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20592; GFX10-WGP-NEXT: buffer_gl1_inv 20593; GFX10-WGP-NEXT: buffer_gl0_inv 20594; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 20595; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 20596; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20597; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 20598; GFX10-WGP-NEXT: s_endpgm 20599; 20600; GFX10-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20601; GFX10-CU: ; %bb.0: ; %entry 20602; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 20603; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20604; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 20605; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 20606; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 20607; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20608; GFX10-CU-NEXT: s_mov_b32 s6, s4 20609; GFX10-CU-NEXT: s_mov_b32 s7, s5 20610; GFX10-CU-NEXT: s_mov_b32 s11, s12 20611; GFX10-CU-NEXT: s_mov_b32 s10, s13 20612; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 20613; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 20614; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20615; GFX10-CU-NEXT: s_mov_b32 s7, s10 20616; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 20617; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 20618; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20619; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 20620; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 20621; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20622; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20623; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20624; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20625; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20626; GFX10-CU-NEXT: buffer_gl1_inv 20627; GFX10-CU-NEXT: buffer_gl0_inv 20628; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 20629; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 20630; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20631; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 20632; GFX10-CU-NEXT: s_endpgm 20633; 20634; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20635; SKIP-CACHE-INV: ; %bb.0: ; %entry 20636; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20637; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20638; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20639; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20640; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 20641; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20642; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 20643; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 20644; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 20645; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 20646; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 20647; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 20648; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 20649; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20650; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 20651; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 20652; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20653; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 20654; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 20655; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 20656; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20657; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20658; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20659; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 20660; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 20661; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20662; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 20663; SKIP-CACHE-INV-NEXT: s_endpgm 20664; 20665; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20666; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20667; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20668; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20669; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20670; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20671; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20672; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20673; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20674; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20675; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20676; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 20677; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20678; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20679; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20680; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 20681; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 20682; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20683; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20684; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 20685; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 20686; 20687; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20688; GFX90A-TGSPLIT: ; %bb.0: ; %entry 20689; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20690; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20691; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20692; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20693; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 20694; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 20695; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20696; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20697; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20698; GFX90A-TGSPLIT-NEXT: buffer_wbl2 20699; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20700; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 20701; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20702; GFX90A-TGSPLIT-NEXT: buffer_invl2 20703; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 20704; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 20705; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 20706; GFX90A-TGSPLIT-NEXT: s_endpgm 20707; 20708; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20709; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 20710; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20711; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20712; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20713; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20714; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20715; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20716; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20717; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20718; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20719; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20720; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20721; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20722; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 20723; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 20724; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20725; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20726; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20727; GFX940-NOTTGSPLIT-NEXT: s_endpgm 20728; 20729; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20730; GFX940-TGSPLIT: ; %bb.0: ; %entry 20731; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 20732; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 20733; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 20734; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20735; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 20736; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 20737; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20738; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 20739; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20740; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 20741; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20742; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 20743; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 20744; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 20745; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 20746; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 20747; GFX940-TGSPLIT-NEXT: s_endpgm 20748; 20749; GFX11-WGP-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20750; GFX11-WGP: ; %bb.0: ; %entry 20751; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20752; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20753; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20754; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20755; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 20756; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 20757; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20758; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 20759; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20760; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20761; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20762; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20763; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20764; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 20765; GFX11-WGP-NEXT: buffer_gl1_inv 20766; GFX11-WGP-NEXT: buffer_gl0_inv 20767; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 20768; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 20769; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 20770; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 20771; GFX11-WGP-NEXT: s_endpgm 20772; 20773; GFX11-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20774; GFX11-CU: ; %bb.0: ; %entry 20775; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20776; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20777; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20778; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20779; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 20780; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 20781; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20782; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 20783; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20784; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20785; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20786; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 20787; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 20788; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 20789; GFX11-CU-NEXT: buffer_gl1_inv 20790; GFX11-CU-NEXT: buffer_gl0_inv 20791; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 20792; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 20793; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 20794; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 20795; GFX11-CU-NEXT: s_endpgm 20796; 20797; GFX12-WGP-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20798; GFX12-WGP: ; %bb.0: ; %entry 20799; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20800; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 20801; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 20802; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 20803; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 20804; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 20805; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20806; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 20807; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20808; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20809; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 20810; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20811; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20812; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20813; GFX12-WGP-NEXT: s_wait_storecnt 0x0 20814; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20815; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 20816; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 20817; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 20818; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 20819; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 20820; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 20821; GFX12-WGP-NEXT: s_wait_dscnt 0x0 20822; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 20823; GFX12-WGP-NEXT: s_endpgm 20824; 20825; GFX12-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: 20826; GFX12-CU: ; %bb.0: ; %entry 20827; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 20828; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 20829; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 20830; GFX12-CU-NEXT: s_wait_kmcnt 0x0 20831; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 20832; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 20833; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20834; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 20835; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20836; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20837; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 20838; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20839; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20840; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20841; GFX12-CU-NEXT: s_wait_storecnt 0x0 20842; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 20843; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 20844; GFX12-CU-NEXT: s_wait_samplecnt 0x0 20845; GFX12-CU-NEXT: s_wait_loadcnt 0x0 20846; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 20847; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 20848; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 20849; GFX12-CU-NEXT: s_wait_dscnt 0x0 20850; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 20851; GFX12-CU-NEXT: s_endpgm 20852 ptr %out, i32 %in, i32 %old) { 20853entry: 20854 %gep = getelementptr i32, ptr %out, i32 4 20855 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire 20856 %val0 = extractvalue { i32, i1 } %val, 0 20857 store i32 %val0, ptr %out, align 4 20858 ret void 20859} 20860 20861define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( 20862; GFX7-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 20863; GFX7: ; %bb.0: ; %entry 20864; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 20865; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20866; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 20867; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 20868; GFX7-NEXT: s_mov_b64 s[12:13], 16 20869; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20870; GFX7-NEXT: s_mov_b32 s6, s4 20871; GFX7-NEXT: s_mov_b32 s7, s5 20872; GFX7-NEXT: s_mov_b32 s11, s12 20873; GFX7-NEXT: s_mov_b32 s10, s13 20874; GFX7-NEXT: s_add_u32 s6, s6, s11 20875; GFX7-NEXT: s_addc_u32 s10, s7, s10 20876; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20877; GFX7-NEXT: s_mov_b32 s7, s10 20878; GFX7-NEXT: v_mov_b32_e32 v2, s9 20879; GFX7-NEXT: v_mov_b32_e32 v0, s8 20880; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20881; GFX7-NEXT: v_mov_b32_e32 v3, v0 20882; GFX7-NEXT: v_mov_b32_e32 v0, s6 20883; GFX7-NEXT: v_mov_b32_e32 v1, s7 20884; GFX7-NEXT: s_waitcnt vmcnt(0) 20885; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20886; GFX7-NEXT: s_waitcnt vmcnt(0) 20887; GFX7-NEXT: buffer_wbinvl1_vol 20888; GFX7-NEXT: v_mov_b32_e32 v0, s4 20889; GFX7-NEXT: v_mov_b32_e32 v1, s5 20890; GFX7-NEXT: s_waitcnt lgkmcnt(0) 20891; GFX7-NEXT: flat_store_dword v[0:1], v2 20892; GFX7-NEXT: s_endpgm 20893; 20894; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 20895; GFX10-WGP: ; %bb.0: ; %entry 20896; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 20897; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20898; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 20899; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 20900; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 20901; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20902; GFX10-WGP-NEXT: s_mov_b32 s6, s4 20903; GFX10-WGP-NEXT: s_mov_b32 s7, s5 20904; GFX10-WGP-NEXT: s_mov_b32 s11, s12 20905; GFX10-WGP-NEXT: s_mov_b32 s10, s13 20906; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 20907; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 20908; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20909; GFX10-WGP-NEXT: s_mov_b32 s7, s10 20910; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 20911; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 20912; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20913; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 20914; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 20915; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 20916; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20917; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 20918; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20919; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 20920; GFX10-WGP-NEXT: buffer_gl1_inv 20921; GFX10-WGP-NEXT: buffer_gl0_inv 20922; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 20923; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 20924; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 20925; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 20926; GFX10-WGP-NEXT: s_endpgm 20927; 20928; GFX10-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 20929; GFX10-CU: ; %bb.0: ; %entry 20930; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 20931; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 20932; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 20933; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 20934; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 20935; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20936; GFX10-CU-NEXT: s_mov_b32 s6, s4 20937; GFX10-CU-NEXT: s_mov_b32 s7, s5 20938; GFX10-CU-NEXT: s_mov_b32 s11, s12 20939; GFX10-CU-NEXT: s_mov_b32 s10, s13 20940; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 20941; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 20942; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 20943; GFX10-CU-NEXT: s_mov_b32 s7, s10 20944; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 20945; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 20946; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20947; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 20948; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 20949; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 20950; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20951; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 20952; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20953; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 20954; GFX10-CU-NEXT: buffer_gl1_inv 20955; GFX10-CU-NEXT: buffer_gl0_inv 20956; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 20957; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 20958; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 20959; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 20960; GFX10-CU-NEXT: s_endpgm 20961; 20962; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 20963; SKIP-CACHE-INV: ; %bb.0: ; %entry 20964; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 20965; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 20966; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 20967; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 20968; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 20969; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20970; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 20971; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 20972; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 20973; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 20974; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 20975; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 20976; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 20977; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 20978; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 20979; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 20980; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 20981; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 20982; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 20983; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 20984; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20985; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 20986; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 20987; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 20988; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 20989; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 20990; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 20991; SKIP-CACHE-INV-NEXT: s_endpgm 20992; 20993; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 20994; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 20995; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 20996; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 20997; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 20998; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 20999; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21000; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21001; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21002; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21003; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21004; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 21005; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21006; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21007; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21008; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 21009; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21010; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21011; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21012; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 21013; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21014; 21015; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21016; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21017; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21018; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21019; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21020; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21021; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21022; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21023; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21024; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21025; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21026; GFX90A-TGSPLIT-NEXT: buffer_wbl2 21027; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21028; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21029; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21030; GFX90A-TGSPLIT-NEXT: buffer_invl2 21031; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21032; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21033; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 21034; GFX90A-TGSPLIT-NEXT: s_endpgm 21035; 21036; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21037; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21038; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21039; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21040; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21041; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21042; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21043; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21044; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21045; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21046; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21047; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21048; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21049; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21050; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21051; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 21052; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21053; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21054; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21055; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21056; 21057; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21058; GFX940-TGSPLIT: ; %bb.0: ; %entry 21059; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21060; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21061; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21062; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21063; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21064; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21065; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21066; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21067; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21068; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21069; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21070; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21071; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21072; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 21073; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21074; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21075; GFX940-TGSPLIT-NEXT: s_endpgm 21076; 21077; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21078; GFX11-WGP: ; %bb.0: ; %entry 21079; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21080; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21081; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21082; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21083; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 21084; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 21085; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21086; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 21087; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21088; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21089; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21090; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21091; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21092; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21093; GFX11-WGP-NEXT: buffer_gl1_inv 21094; GFX11-WGP-NEXT: buffer_gl0_inv 21095; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21096; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21097; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21098; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 21099; GFX11-WGP-NEXT: s_endpgm 21100; 21101; GFX11-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21102; GFX11-CU: ; %bb.0: ; %entry 21103; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21104; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21105; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21106; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21107; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 21108; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 21109; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21110; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 21111; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21112; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21113; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21114; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21115; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21116; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21117; GFX11-CU-NEXT: buffer_gl1_inv 21118; GFX11-CU-NEXT: buffer_gl0_inv 21119; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21120; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21121; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21122; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 21123; GFX11-CU-NEXT: s_endpgm 21124; 21125; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21126; GFX12-WGP: ; %bb.0: ; %entry 21127; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21128; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21129; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21130; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21131; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 21132; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 21133; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21134; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 21135; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21136; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21137; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 21138; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21139; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21140; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21141; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21142; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21143; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21144; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21145; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21146; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 21147; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21148; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21149; GFX12-WGP-NEXT: s_wait_dscnt 0x0 21150; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 21151; GFX12-WGP-NEXT: s_endpgm 21152; 21153; GFX12-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: 21154; GFX12-CU: ; %bb.0: ; %entry 21155; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21156; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21157; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21158; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21159; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 21160; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 21161; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21162; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 21163; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21164; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21165; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 21166; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21167; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21168; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21169; GFX12-CU-NEXT: s_wait_storecnt 0x0 21170; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21171; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21172; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21173; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21174; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 21175; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21176; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21177; GFX12-CU-NEXT: s_wait_dscnt 0x0 21178; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 21179; GFX12-CU-NEXT: s_endpgm 21180 ptr %out, i32 %in, i32 %old) { 21181entry: 21182 %gep = getelementptr i32, ptr %out, i32 4 21183 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 21184 %val0 = extractvalue { i32, i1 } %val, 0 21185 store i32 %val0, ptr %out, align 4 21186 ret void 21187} 21188 21189define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( 21190; GFX7-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21191; GFX7: ; %bb.0: ; %entry 21192; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21193; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21194; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21195; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21196; GFX7-NEXT: s_mov_b64 s[12:13], 16 21197; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21198; GFX7-NEXT: s_mov_b32 s6, s4 21199; GFX7-NEXT: s_mov_b32 s7, s5 21200; GFX7-NEXT: s_mov_b32 s11, s12 21201; GFX7-NEXT: s_mov_b32 s10, s13 21202; GFX7-NEXT: s_add_u32 s6, s6, s11 21203; GFX7-NEXT: s_addc_u32 s10, s7, s10 21204; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21205; GFX7-NEXT: s_mov_b32 s7, s10 21206; GFX7-NEXT: v_mov_b32_e32 v2, s9 21207; GFX7-NEXT: v_mov_b32_e32 v0, s8 21208; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21209; GFX7-NEXT: v_mov_b32_e32 v3, v0 21210; GFX7-NEXT: v_mov_b32_e32 v0, s6 21211; GFX7-NEXT: v_mov_b32_e32 v1, s7 21212; GFX7-NEXT: s_waitcnt vmcnt(0) 21213; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21214; GFX7-NEXT: s_waitcnt vmcnt(0) 21215; GFX7-NEXT: buffer_wbinvl1_vol 21216; GFX7-NEXT: v_mov_b32_e32 v0, s4 21217; GFX7-NEXT: v_mov_b32_e32 v1, s5 21218; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21219; GFX7-NEXT: flat_store_dword v[0:1], v2 21220; GFX7-NEXT: s_endpgm 21221; 21222; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21223; GFX10-WGP: ; %bb.0: ; %entry 21224; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 21225; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21226; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 21227; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 21228; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 21229; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21230; GFX10-WGP-NEXT: s_mov_b32 s6, s4 21231; GFX10-WGP-NEXT: s_mov_b32 s7, s5 21232; GFX10-WGP-NEXT: s_mov_b32 s11, s12 21233; GFX10-WGP-NEXT: s_mov_b32 s10, s13 21234; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 21235; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 21236; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21237; GFX10-WGP-NEXT: s_mov_b32 s7, s10 21238; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 21239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 21240; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21241; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 21242; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 21243; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21244; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21245; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21246; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21247; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21248; GFX10-WGP-NEXT: buffer_gl1_inv 21249; GFX10-WGP-NEXT: buffer_gl0_inv 21250; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 21251; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 21252; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21253; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 21254; GFX10-WGP-NEXT: s_endpgm 21255; 21256; GFX10-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21257; GFX10-CU: ; %bb.0: ; %entry 21258; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 21259; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21260; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 21261; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 21262; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 21263; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21264; GFX10-CU-NEXT: s_mov_b32 s6, s4 21265; GFX10-CU-NEXT: s_mov_b32 s7, s5 21266; GFX10-CU-NEXT: s_mov_b32 s11, s12 21267; GFX10-CU-NEXT: s_mov_b32 s10, s13 21268; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 21269; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 21270; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21271; GFX10-CU-NEXT: s_mov_b32 s7, s10 21272; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 21273; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 21274; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21275; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 21276; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 21277; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21278; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21279; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21280; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21281; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21282; GFX10-CU-NEXT: buffer_gl1_inv 21283; GFX10-CU-NEXT: buffer_gl0_inv 21284; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 21285; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 21286; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21287; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 21288; GFX10-CU-NEXT: s_endpgm 21289; 21290; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21291; SKIP-CACHE-INV: ; %bb.0: ; %entry 21292; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21293; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21294; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21295; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21296; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 21297; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21298; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 21299; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 21300; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 21301; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 21302; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 21303; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 21304; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 21305; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21306; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 21307; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 21308; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21309; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 21310; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 21311; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 21312; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21313; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21314; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21315; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 21316; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 21317; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21318; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 21319; SKIP-CACHE-INV-NEXT: s_endpgm 21320; 21321; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21322; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21323; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21324; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21325; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21326; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21327; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21328; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21329; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21330; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21331; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21332; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 21333; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21334; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21335; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21336; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 21337; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21338; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21339; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21340; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 21341; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21342; 21343; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21344; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21345; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21346; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21347; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21348; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21349; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21350; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21351; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21352; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21353; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21354; GFX90A-TGSPLIT-NEXT: buffer_wbl2 21355; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21356; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21357; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21358; GFX90A-TGSPLIT-NEXT: buffer_invl2 21359; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21360; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21361; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 21362; GFX90A-TGSPLIT-NEXT: s_endpgm 21363; 21364; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21365; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21366; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21367; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21368; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21369; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21370; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21371; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21372; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21373; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21374; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21375; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21376; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21377; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21378; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21379; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 21380; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21381; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21382; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21383; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21384; 21385; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21386; GFX940-TGSPLIT: ; %bb.0: ; %entry 21387; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21388; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21389; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21390; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21391; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21392; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21393; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21394; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21395; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21396; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21397; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21398; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21399; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21400; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 21401; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21402; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21403; GFX940-TGSPLIT-NEXT: s_endpgm 21404; 21405; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21406; GFX11-WGP: ; %bb.0: ; %entry 21407; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21408; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21409; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21410; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21411; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 21412; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 21413; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21414; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 21415; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21416; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21417; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21418; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21419; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21420; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21421; GFX11-WGP-NEXT: buffer_gl1_inv 21422; GFX11-WGP-NEXT: buffer_gl0_inv 21423; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21424; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21425; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21426; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 21427; GFX11-WGP-NEXT: s_endpgm 21428; 21429; GFX11-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21430; GFX11-CU: ; %bb.0: ; %entry 21431; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21432; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21433; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21434; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21435; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 21436; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 21437; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21438; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 21439; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21440; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21441; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21442; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21443; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21444; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21445; GFX11-CU-NEXT: buffer_gl1_inv 21446; GFX11-CU-NEXT: buffer_gl0_inv 21447; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21448; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21449; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21450; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 21451; GFX11-CU-NEXT: s_endpgm 21452; 21453; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21454; GFX12-WGP: ; %bb.0: ; %entry 21455; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21456; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21457; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21458; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21459; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 21460; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 21461; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21462; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 21463; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21464; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21465; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 21466; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21467; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21468; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21469; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21470; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21471; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21472; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21473; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21474; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 21475; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21476; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21477; GFX12-WGP-NEXT: s_wait_dscnt 0x0 21478; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 21479; GFX12-WGP-NEXT: s_endpgm 21480; 21481; GFX12-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: 21482; GFX12-CU: ; %bb.0: ; %entry 21483; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21484; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21485; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21486; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21487; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 21488; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 21489; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21490; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 21491; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21492; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21493; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 21494; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21495; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21496; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21497; GFX12-CU-NEXT: s_wait_storecnt 0x0 21498; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21499; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21500; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21501; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21502; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 21503; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21504; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21505; GFX12-CU-NEXT: s_wait_dscnt 0x0 21506; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 21507; GFX12-CU-NEXT: s_endpgm 21508 ptr %out, i32 %in, i32 %old) { 21509entry: 21510 %gep = getelementptr i32, ptr %out, i32 4 21511 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 21512 %val0 = extractvalue { i32, i1 } %val, 0 21513 store i32 %val0, ptr %out, align 4 21514 ret void 21515} 21516 21517define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( 21518; GFX7-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21519; GFX7: ; %bb.0: ; %entry 21520; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21521; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21522; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21523; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21524; GFX7-NEXT: s_mov_b64 s[12:13], 16 21525; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21526; GFX7-NEXT: s_mov_b32 s6, s4 21527; GFX7-NEXT: s_mov_b32 s7, s5 21528; GFX7-NEXT: s_mov_b32 s11, s12 21529; GFX7-NEXT: s_mov_b32 s10, s13 21530; GFX7-NEXT: s_add_u32 s6, s6, s11 21531; GFX7-NEXT: s_addc_u32 s10, s7, s10 21532; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21533; GFX7-NEXT: s_mov_b32 s7, s10 21534; GFX7-NEXT: v_mov_b32_e32 v2, s9 21535; GFX7-NEXT: v_mov_b32_e32 v0, s8 21536; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21537; GFX7-NEXT: v_mov_b32_e32 v3, v0 21538; GFX7-NEXT: v_mov_b32_e32 v0, s6 21539; GFX7-NEXT: v_mov_b32_e32 v1, s7 21540; GFX7-NEXT: s_waitcnt vmcnt(0) 21541; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21542; GFX7-NEXT: s_waitcnt vmcnt(0) 21543; GFX7-NEXT: buffer_wbinvl1_vol 21544; GFX7-NEXT: v_mov_b32_e32 v0, s4 21545; GFX7-NEXT: v_mov_b32_e32 v1, s5 21546; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21547; GFX7-NEXT: flat_store_dword v[0:1], v2 21548; GFX7-NEXT: s_endpgm 21549; 21550; GFX10-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21551; GFX10-WGP: ; %bb.0: ; %entry 21552; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 21553; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21554; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 21555; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 21556; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 21557; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21558; GFX10-WGP-NEXT: s_mov_b32 s6, s4 21559; GFX10-WGP-NEXT: s_mov_b32 s7, s5 21560; GFX10-WGP-NEXT: s_mov_b32 s11, s12 21561; GFX10-WGP-NEXT: s_mov_b32 s10, s13 21562; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 21563; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 21564; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21565; GFX10-WGP-NEXT: s_mov_b32 s7, s10 21566; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 21567; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 21568; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21569; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 21570; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 21571; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21572; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21573; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21574; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21575; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21576; GFX10-WGP-NEXT: buffer_gl1_inv 21577; GFX10-WGP-NEXT: buffer_gl0_inv 21578; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 21579; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 21580; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21581; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 21582; GFX10-WGP-NEXT: s_endpgm 21583; 21584; GFX10-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21585; GFX10-CU: ; %bb.0: ; %entry 21586; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 21587; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21588; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 21589; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 21590; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 21591; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21592; GFX10-CU-NEXT: s_mov_b32 s6, s4 21593; GFX10-CU-NEXT: s_mov_b32 s7, s5 21594; GFX10-CU-NEXT: s_mov_b32 s11, s12 21595; GFX10-CU-NEXT: s_mov_b32 s10, s13 21596; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 21597; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 21598; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21599; GFX10-CU-NEXT: s_mov_b32 s7, s10 21600; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 21601; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 21602; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21603; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 21604; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 21605; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21606; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21607; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21608; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21609; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21610; GFX10-CU-NEXT: buffer_gl1_inv 21611; GFX10-CU-NEXT: buffer_gl0_inv 21612; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 21613; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 21614; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21615; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 21616; GFX10-CU-NEXT: s_endpgm 21617; 21618; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21619; SKIP-CACHE-INV: ; %bb.0: ; %entry 21620; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21621; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21622; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21623; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21624; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 21625; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21626; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 21627; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 21628; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 21629; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 21630; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 21631; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 21632; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 21633; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21634; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 21635; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 21636; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21637; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 21638; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 21639; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 21640; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21641; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21642; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21643; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 21644; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 21645; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21646; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 21647; SKIP-CACHE-INV-NEXT: s_endpgm 21648; 21649; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21650; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21651; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21652; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21653; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21654; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21655; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21656; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21657; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21658; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21659; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21660; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 21661; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21662; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21663; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21664; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 21665; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21666; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21667; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21668; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 21669; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21670; 21671; GFX90A-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21672; GFX90A-TGSPLIT: ; %bb.0: ; %entry 21673; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21674; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21675; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21676; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21677; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21678; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21679; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21680; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21681; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21682; GFX90A-TGSPLIT-NEXT: buffer_wbl2 21683; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21684; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21685; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21686; GFX90A-TGSPLIT-NEXT: buffer_invl2 21687; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 21688; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21689; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 21690; GFX90A-TGSPLIT-NEXT: s_endpgm 21691; 21692; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21693; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 21694; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21695; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21696; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21697; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21698; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21699; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21700; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21701; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21702; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21703; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21704; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21705; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21706; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21707; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 21708; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21709; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21710; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21711; GFX940-NOTTGSPLIT-NEXT: s_endpgm 21712; 21713; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21714; GFX940-TGSPLIT: ; %bb.0: ; %entry 21715; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 21716; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 21717; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 21718; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21719; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 21720; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 21721; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21722; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21723; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21724; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 21725; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21726; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 21727; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 21728; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 21729; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 21730; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 21731; GFX940-TGSPLIT-NEXT: s_endpgm 21732; 21733; GFX11-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21734; GFX11-WGP: ; %bb.0: ; %entry 21735; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21736; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21737; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21738; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21739; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 21740; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 21741; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21742; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 21743; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21744; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21745; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21746; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21747; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21748; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 21749; GFX11-WGP-NEXT: buffer_gl1_inv 21750; GFX11-WGP-NEXT: buffer_gl0_inv 21751; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 21752; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 21753; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 21754; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 21755; GFX11-WGP-NEXT: s_endpgm 21756; 21757; GFX11-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21758; GFX11-CU: ; %bb.0: ; %entry 21759; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21760; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21761; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21762; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21763; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 21764; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 21765; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21766; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 21767; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21768; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21769; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21770; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 21771; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 21772; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 21773; GFX11-CU-NEXT: buffer_gl1_inv 21774; GFX11-CU-NEXT: buffer_gl0_inv 21775; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 21776; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 21777; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 21778; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 21779; GFX11-CU-NEXT: s_endpgm 21780; 21781; GFX12-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21782; GFX12-WGP: ; %bb.0: ; %entry 21783; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21784; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 21785; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 21786; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 21787; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 21788; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 21789; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21790; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 21791; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21792; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21793; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 21794; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21795; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21796; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21797; GFX12-WGP-NEXT: s_wait_storecnt 0x0 21798; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21799; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 21800; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 21801; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 21802; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 21803; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 21804; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 21805; GFX12-WGP-NEXT: s_wait_dscnt 0x0 21806; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 21807; GFX12-WGP-NEXT: s_endpgm 21808; 21809; GFX12-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: 21810; GFX12-CU: ; %bb.0: ; %entry 21811; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 21812; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 21813; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 21814; GFX12-CU-NEXT: s_wait_kmcnt 0x0 21815; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 21816; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 21817; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21818; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 21819; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21820; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21821; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 21822; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21823; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21824; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21825; GFX12-CU-NEXT: s_wait_storecnt 0x0 21826; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 21827; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 21828; GFX12-CU-NEXT: s_wait_samplecnt 0x0 21829; GFX12-CU-NEXT: s_wait_loadcnt 0x0 21830; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 21831; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 21832; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 21833; GFX12-CU-NEXT: s_wait_dscnt 0x0 21834; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 21835; GFX12-CU-NEXT: s_endpgm 21836 ptr %out, i32 %in, i32 %old) { 21837entry: 21838 %gep = getelementptr i32, ptr %out, i32 4 21839 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst 21840 %val0 = extractvalue { i32, i1 } %val, 0 21841 store i32 %val0, ptr %out, align 4 21842 ret void 21843} 21844 21845define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( 21846; GFX7-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 21847; GFX7: ; %bb.0: ; %entry 21848; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 21849; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21850; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 21851; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 21852; GFX7-NEXT: s_mov_b64 s[12:13], 16 21853; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21854; GFX7-NEXT: s_mov_b32 s6, s4 21855; GFX7-NEXT: s_mov_b32 s7, s5 21856; GFX7-NEXT: s_mov_b32 s11, s12 21857; GFX7-NEXT: s_mov_b32 s10, s13 21858; GFX7-NEXT: s_add_u32 s6, s6, s11 21859; GFX7-NEXT: s_addc_u32 s10, s7, s10 21860; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21861; GFX7-NEXT: s_mov_b32 s7, s10 21862; GFX7-NEXT: v_mov_b32_e32 v2, s9 21863; GFX7-NEXT: v_mov_b32_e32 v0, s8 21864; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21865; GFX7-NEXT: v_mov_b32_e32 v3, v0 21866; GFX7-NEXT: v_mov_b32_e32 v0, s6 21867; GFX7-NEXT: v_mov_b32_e32 v1, s7 21868; GFX7-NEXT: s_waitcnt vmcnt(0) 21869; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21870; GFX7-NEXT: s_waitcnt vmcnt(0) 21871; GFX7-NEXT: buffer_wbinvl1_vol 21872; GFX7-NEXT: v_mov_b32_e32 v0, s4 21873; GFX7-NEXT: v_mov_b32_e32 v1, s5 21874; GFX7-NEXT: s_waitcnt lgkmcnt(0) 21875; GFX7-NEXT: flat_store_dword v[0:1], v2 21876; GFX7-NEXT: s_endpgm 21877; 21878; GFX10-WGP-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 21879; GFX10-WGP: ; %bb.0: ; %entry 21880; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 21881; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21882; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 21883; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 21884; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 21885; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21886; GFX10-WGP-NEXT: s_mov_b32 s6, s4 21887; GFX10-WGP-NEXT: s_mov_b32 s7, s5 21888; GFX10-WGP-NEXT: s_mov_b32 s11, s12 21889; GFX10-WGP-NEXT: s_mov_b32 s10, s13 21890; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 21891; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 21892; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21893; GFX10-WGP-NEXT: s_mov_b32 s7, s10 21894; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 21895; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 21896; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21897; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 21898; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 21899; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 21900; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21901; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 21902; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21903; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 21904; GFX10-WGP-NEXT: buffer_gl1_inv 21905; GFX10-WGP-NEXT: buffer_gl0_inv 21906; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 21907; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 21908; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 21909; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 21910; GFX10-WGP-NEXT: s_endpgm 21911; 21912; GFX10-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 21913; GFX10-CU: ; %bb.0: ; %entry 21914; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 21915; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 21916; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 21917; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 21918; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 21919; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21920; GFX10-CU-NEXT: s_mov_b32 s6, s4 21921; GFX10-CU-NEXT: s_mov_b32 s7, s5 21922; GFX10-CU-NEXT: s_mov_b32 s11, s12 21923; GFX10-CU-NEXT: s_mov_b32 s10, s13 21924; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 21925; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 21926; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 21927; GFX10-CU-NEXT: s_mov_b32 s7, s10 21928; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 21929; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 21930; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21931; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 21932; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 21933; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 21934; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21935; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 21936; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21937; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 21938; GFX10-CU-NEXT: buffer_gl1_inv 21939; GFX10-CU-NEXT: buffer_gl0_inv 21940; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 21941; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 21942; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 21943; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 21944; GFX10-CU-NEXT: s_endpgm 21945; 21946; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 21947; SKIP-CACHE-INV: ; %bb.0: ; %entry 21948; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 21949; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 21950; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 21951; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 21952; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 21953; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21954; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 21955; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 21956; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 21957; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 21958; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 21959; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 21960; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 21961; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 21962; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 21963; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 21964; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21965; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 21966; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 21967; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 21968; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21969; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 21970; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 21971; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 21972; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 21973; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 21974; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 21975; SKIP-CACHE-INV-NEXT: s_endpgm 21976; 21977; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 21978; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 21979; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 21980; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 21981; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 21982; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21983; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 21984; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 21985; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 21986; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 21987; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21988; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 21989; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21990; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 21991; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 21992; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 21993; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 21994; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 21995; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 21996; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 21997; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 21998; 21999; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22000; GFX90A-TGSPLIT: ; %bb.0: ; %entry 22001; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22002; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22003; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22004; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22005; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22006; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22007; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22008; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22009; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22010; GFX90A-TGSPLIT-NEXT: buffer_wbl2 22011; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22012; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22013; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22014; GFX90A-TGSPLIT-NEXT: buffer_invl2 22015; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 22016; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22017; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 22018; GFX90A-TGSPLIT-NEXT: s_endpgm 22019; 22020; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22021; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 22022; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22023; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22024; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22025; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22026; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22027; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22028; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22029; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22030; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22031; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22032; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22033; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22034; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22035; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 22036; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22037; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22038; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22039; GFX940-NOTTGSPLIT-NEXT: s_endpgm 22040; 22041; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22042; GFX940-TGSPLIT: ; %bb.0: ; %entry 22043; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22044; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22045; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22046; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22047; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22048; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22049; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22050; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22051; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22052; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22053; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22054; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22055; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22056; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 22057; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22058; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22059; GFX940-TGSPLIT-NEXT: s_endpgm 22060; 22061; GFX11-WGP-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22062; GFX11-WGP: ; %bb.0: ; %entry 22063; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22064; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22065; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22066; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22067; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 22068; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 22069; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22070; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 22071; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22072; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22073; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22074; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22075; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22076; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22077; GFX11-WGP-NEXT: buffer_gl1_inv 22078; GFX11-WGP-NEXT: buffer_gl0_inv 22079; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22080; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22081; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22082; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 22083; GFX11-WGP-NEXT: s_endpgm 22084; 22085; GFX11-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22086; GFX11-CU: ; %bb.0: ; %entry 22087; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22088; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22089; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22090; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22091; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 22092; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 22093; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22094; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 22095; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22096; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22097; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22098; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 22099; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22100; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22101; GFX11-CU-NEXT: buffer_gl1_inv 22102; GFX11-CU-NEXT: buffer_gl0_inv 22103; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22104; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22105; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22106; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 22107; GFX11-CU-NEXT: s_endpgm 22108; 22109; GFX12-WGP-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22110; GFX12-WGP: ; %bb.0: ; %entry 22111; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22112; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22113; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22114; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 22115; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 22116; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 22117; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22118; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 22119; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22120; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22121; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 22122; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22123; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22124; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22125; GFX12-WGP-NEXT: s_wait_storecnt 0x0 22126; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22127; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22128; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 22129; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22130; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22131; GFX12-WGP-NEXT: s_wait_dscnt 0x0 22132; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 22133; GFX12-WGP-NEXT: s_endpgm 22134; 22135; GFX12-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: 22136; GFX12-CU: ; %bb.0: ; %entry 22137; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22138; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22139; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22140; GFX12-CU-NEXT: s_wait_kmcnt 0x0 22141; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 22142; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 22143; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22144; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 22145; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22146; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22147; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 22148; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22149; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22150; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22151; GFX12-CU-NEXT: s_wait_storecnt 0x0 22152; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22153; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22154; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 22155; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22156; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22157; GFX12-CU-NEXT: s_wait_dscnt 0x0 22158; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 22159; GFX12-CU-NEXT: s_endpgm 22160 ptr %out, i32 %in, i32 %old) { 22161entry: 22162 %gep = getelementptr i32, ptr %out, i32 4 22163 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst 22164 %val0 = extractvalue { i32, i1 } %val, 0 22165 store i32 %val0, ptr %out, align 4 22166 ret void 22167} 22168 22169define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( 22170; GFX7-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22171; GFX7: ; %bb.0: ; %entry 22172; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 22173; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22174; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 22175; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 22176; GFX7-NEXT: s_mov_b64 s[12:13], 16 22177; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22178; GFX7-NEXT: s_mov_b32 s6, s4 22179; GFX7-NEXT: s_mov_b32 s7, s5 22180; GFX7-NEXT: s_mov_b32 s11, s12 22181; GFX7-NEXT: s_mov_b32 s10, s13 22182; GFX7-NEXT: s_add_u32 s6, s6, s11 22183; GFX7-NEXT: s_addc_u32 s10, s7, s10 22184; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22185; GFX7-NEXT: s_mov_b32 s7, s10 22186; GFX7-NEXT: v_mov_b32_e32 v2, s9 22187; GFX7-NEXT: v_mov_b32_e32 v0, s8 22188; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22189; GFX7-NEXT: v_mov_b32_e32 v3, v0 22190; GFX7-NEXT: v_mov_b32_e32 v0, s6 22191; GFX7-NEXT: v_mov_b32_e32 v1, s7 22192; GFX7-NEXT: s_waitcnt vmcnt(0) 22193; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22194; GFX7-NEXT: s_waitcnt vmcnt(0) 22195; GFX7-NEXT: buffer_wbinvl1_vol 22196; GFX7-NEXT: v_mov_b32_e32 v0, s4 22197; GFX7-NEXT: v_mov_b32_e32 v1, s5 22198; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22199; GFX7-NEXT: flat_store_dword v[0:1], v2 22200; GFX7-NEXT: s_endpgm 22201; 22202; GFX10-WGP-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22203; GFX10-WGP: ; %bb.0: ; %entry 22204; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 22205; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22206; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 22207; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 22208; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 22209; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22210; GFX10-WGP-NEXT: s_mov_b32 s6, s4 22211; GFX10-WGP-NEXT: s_mov_b32 s7, s5 22212; GFX10-WGP-NEXT: s_mov_b32 s11, s12 22213; GFX10-WGP-NEXT: s_mov_b32 s10, s13 22214; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 22215; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 22216; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22217; GFX10-WGP-NEXT: s_mov_b32 s7, s10 22218; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 22219; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 22220; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22221; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 22222; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 22223; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 22224; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22225; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22226; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22227; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22228; GFX10-WGP-NEXT: buffer_gl1_inv 22229; GFX10-WGP-NEXT: buffer_gl0_inv 22230; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 22231; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 22232; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22233; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 22234; GFX10-WGP-NEXT: s_endpgm 22235; 22236; GFX10-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22237; GFX10-CU: ; %bb.0: ; %entry 22238; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 22239; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22240; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 22241; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 22242; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 22243; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22244; GFX10-CU-NEXT: s_mov_b32 s6, s4 22245; GFX10-CU-NEXT: s_mov_b32 s7, s5 22246; GFX10-CU-NEXT: s_mov_b32 s11, s12 22247; GFX10-CU-NEXT: s_mov_b32 s10, s13 22248; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 22249; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 22250; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22251; GFX10-CU-NEXT: s_mov_b32 s7, s10 22252; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 22253; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 22254; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22255; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 22256; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 22257; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 22258; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22259; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 22260; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22261; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22262; GFX10-CU-NEXT: buffer_gl1_inv 22263; GFX10-CU-NEXT: buffer_gl0_inv 22264; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 22265; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 22266; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22267; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 22268; GFX10-CU-NEXT: s_endpgm 22269; 22270; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22271; SKIP-CACHE-INV: ; %bb.0: ; %entry 22272; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 22273; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 22274; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 22275; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 22276; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 22277; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22278; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 22279; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 22280; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 22281; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 22282; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 22283; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 22284; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 22285; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 22286; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 22287; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 22288; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22289; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 22290; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 22291; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 22292; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22293; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22294; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22295; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 22296; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 22297; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22298; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 22299; SKIP-CACHE-INV-NEXT: s_endpgm 22300; 22301; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22302; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 22303; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22304; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22305; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22306; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22307; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22308; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22309; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22310; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22311; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22312; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 22313; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22314; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22315; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22316; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 22317; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 22318; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22319; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22320; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 22321; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 22322; 22323; GFX90A-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22324; GFX90A-TGSPLIT: ; %bb.0: ; %entry 22325; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22326; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22327; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22328; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22329; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22330; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22331; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22332; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22333; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22334; GFX90A-TGSPLIT-NEXT: buffer_wbl2 22335; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22336; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22337; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22338; GFX90A-TGSPLIT-NEXT: buffer_invl2 22339; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 22340; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22341; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 22342; GFX90A-TGSPLIT-NEXT: s_endpgm 22343; 22344; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22345; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 22346; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22347; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22348; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22349; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22350; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22351; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22352; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22353; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22354; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22355; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22356; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22357; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22358; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22359; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 22360; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22361; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22362; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22363; GFX940-NOTTGSPLIT-NEXT: s_endpgm 22364; 22365; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22366; GFX940-TGSPLIT: ; %bb.0: ; %entry 22367; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22368; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22369; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22370; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22371; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22372; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22373; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22374; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22375; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22376; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22377; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22378; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22379; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22380; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 22381; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22382; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22383; GFX940-TGSPLIT-NEXT: s_endpgm 22384; 22385; GFX11-WGP-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22386; GFX11-WGP: ; %bb.0: ; %entry 22387; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22388; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22389; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22390; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22391; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 22392; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 22393; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22394; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 22395; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22396; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22397; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22398; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22399; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22400; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22401; GFX11-WGP-NEXT: buffer_gl1_inv 22402; GFX11-WGP-NEXT: buffer_gl0_inv 22403; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22404; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22405; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22406; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 22407; GFX11-WGP-NEXT: s_endpgm 22408; 22409; GFX11-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22410; GFX11-CU: ; %bb.0: ; %entry 22411; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22412; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22413; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22414; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22415; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 22416; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 22417; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22418; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 22419; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22420; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22421; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22422; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 22423; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22424; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22425; GFX11-CU-NEXT: buffer_gl1_inv 22426; GFX11-CU-NEXT: buffer_gl0_inv 22427; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22428; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22429; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22430; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 22431; GFX11-CU-NEXT: s_endpgm 22432; 22433; GFX12-WGP-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22434; GFX12-WGP: ; %bb.0: ; %entry 22435; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22436; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22437; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22438; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 22439; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 22440; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 22441; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22442; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 22443; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22444; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22445; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 22446; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22447; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22448; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22449; GFX12-WGP-NEXT: s_wait_storecnt 0x0 22450; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22451; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22452; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22453; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22454; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 22455; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22456; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22457; GFX12-WGP-NEXT: s_wait_dscnt 0x0 22458; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 22459; GFX12-WGP-NEXT: s_endpgm 22460; 22461; GFX12-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: 22462; GFX12-CU: ; %bb.0: ; %entry 22463; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22464; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22465; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22466; GFX12-CU-NEXT: s_wait_kmcnt 0x0 22467; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 22468; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 22469; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22470; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 22471; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22472; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22473; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 22474; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22475; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22476; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22477; GFX12-CU-NEXT: s_wait_storecnt 0x0 22478; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22479; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22480; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22481; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22482; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 22483; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22484; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22485; GFX12-CU-NEXT: s_wait_dscnt 0x0 22486; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 22487; GFX12-CU-NEXT: s_endpgm 22488 ptr %out, i32 %in, i32 %old) { 22489entry: 22490 %gep = getelementptr i32, ptr %out, i32 4 22491 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst 22492 %val0 = extractvalue { i32, i1 } %val, 0 22493 store i32 %val0, ptr %out, align 4 22494 ret void 22495} 22496 22497define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( 22498; GFX7-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22499; GFX7: ; %bb.0: ; %entry 22500; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 22501; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22502; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 22503; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 22504; GFX7-NEXT: s_mov_b64 s[12:13], 16 22505; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22506; GFX7-NEXT: s_mov_b32 s6, s4 22507; GFX7-NEXT: s_mov_b32 s7, s5 22508; GFX7-NEXT: s_mov_b32 s11, s12 22509; GFX7-NEXT: s_mov_b32 s10, s13 22510; GFX7-NEXT: s_add_u32 s6, s6, s11 22511; GFX7-NEXT: s_addc_u32 s10, s7, s10 22512; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22513; GFX7-NEXT: s_mov_b32 s7, s10 22514; GFX7-NEXT: v_mov_b32_e32 v2, s9 22515; GFX7-NEXT: v_mov_b32_e32 v0, s8 22516; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22517; GFX7-NEXT: v_mov_b32_e32 v3, v0 22518; GFX7-NEXT: v_mov_b32_e32 v0, s6 22519; GFX7-NEXT: v_mov_b32_e32 v1, s7 22520; GFX7-NEXT: s_waitcnt vmcnt(0) 22521; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22522; GFX7-NEXT: s_waitcnt vmcnt(0) 22523; GFX7-NEXT: buffer_wbinvl1_vol 22524; GFX7-NEXT: v_mov_b32_e32 v0, s4 22525; GFX7-NEXT: v_mov_b32_e32 v1, s5 22526; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22527; GFX7-NEXT: flat_store_dword v[0:1], v2 22528; GFX7-NEXT: s_endpgm 22529; 22530; GFX10-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22531; GFX10-WGP: ; %bb.0: ; %entry 22532; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 22533; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22534; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 22535; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 22536; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 22537; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22538; GFX10-WGP-NEXT: s_mov_b32 s6, s4 22539; GFX10-WGP-NEXT: s_mov_b32 s7, s5 22540; GFX10-WGP-NEXT: s_mov_b32 s11, s12 22541; GFX10-WGP-NEXT: s_mov_b32 s10, s13 22542; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 22543; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 22544; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22545; GFX10-WGP-NEXT: s_mov_b32 s7, s10 22546; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 22547; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 22548; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22549; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 22550; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 22551; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 22552; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22553; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22554; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22555; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22556; GFX10-WGP-NEXT: buffer_gl1_inv 22557; GFX10-WGP-NEXT: buffer_gl0_inv 22558; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 22559; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 22560; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22561; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 22562; GFX10-WGP-NEXT: s_endpgm 22563; 22564; GFX10-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22565; GFX10-CU: ; %bb.0: ; %entry 22566; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 22567; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22568; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 22569; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 22570; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 22571; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22572; GFX10-CU-NEXT: s_mov_b32 s6, s4 22573; GFX10-CU-NEXT: s_mov_b32 s7, s5 22574; GFX10-CU-NEXT: s_mov_b32 s11, s12 22575; GFX10-CU-NEXT: s_mov_b32 s10, s13 22576; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 22577; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 22578; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22579; GFX10-CU-NEXT: s_mov_b32 s7, s10 22580; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 22581; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 22582; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22583; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 22584; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 22585; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 22586; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22587; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 22588; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22589; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22590; GFX10-CU-NEXT: buffer_gl1_inv 22591; GFX10-CU-NEXT: buffer_gl0_inv 22592; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 22593; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 22594; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22595; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 22596; GFX10-CU-NEXT: s_endpgm 22597; 22598; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22599; SKIP-CACHE-INV: ; %bb.0: ; %entry 22600; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 22601; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 22602; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 22603; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 22604; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 22605; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22606; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 22607; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 22608; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 22609; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 22610; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 22611; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 22612; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 22613; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 22614; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 22615; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 22616; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22617; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 22618; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 22619; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 22620; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22621; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22622; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22623; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 22624; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 22625; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22626; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 22627; SKIP-CACHE-INV-NEXT: s_endpgm 22628; 22629; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22630; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 22631; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22632; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22633; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22634; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22635; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22636; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22637; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22638; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22639; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22640; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 22641; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22642; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22643; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22644; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 22645; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 22646; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22647; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22648; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 22649; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 22650; 22651; GFX90A-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22652; GFX90A-TGSPLIT: ; %bb.0: ; %entry 22653; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22654; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22655; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22656; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22657; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22658; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22659; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22660; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22661; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22662; GFX90A-TGSPLIT-NEXT: buffer_wbl2 22663; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22664; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22665; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22666; GFX90A-TGSPLIT-NEXT: buffer_invl2 22667; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 22668; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22669; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 22670; GFX90A-TGSPLIT-NEXT: s_endpgm 22671; 22672; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22673; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 22674; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22675; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22676; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22677; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22678; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22679; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22680; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22681; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22682; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22683; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22684; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22685; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22686; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22687; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 22688; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22689; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22690; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22691; GFX940-NOTTGSPLIT-NEXT: s_endpgm 22692; 22693; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22694; GFX940-TGSPLIT: ; %bb.0: ; %entry 22695; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 22696; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 22697; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 22698; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22699; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 22700; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 22701; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22702; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22703; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22704; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 22705; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22706; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 22707; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22708; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 22709; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 22710; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 22711; GFX940-TGSPLIT-NEXT: s_endpgm 22712; 22713; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22714; GFX11-WGP: ; %bb.0: ; %entry 22715; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22716; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22717; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22718; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22719; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 22720; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 22721; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22722; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 22723; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22724; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22725; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22726; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22727; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22728; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 22729; GFX11-WGP-NEXT: buffer_gl1_inv 22730; GFX11-WGP-NEXT: buffer_gl0_inv 22731; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 22732; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 22733; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 22734; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 22735; GFX11-WGP-NEXT: s_endpgm 22736; 22737; GFX11-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22738; GFX11-CU: ; %bb.0: ; %entry 22739; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22740; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22741; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22742; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22743; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 22744; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 22745; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22746; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 22747; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22748; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22749; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22750; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 22751; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 22752; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 22753; GFX11-CU-NEXT: buffer_gl1_inv 22754; GFX11-CU-NEXT: buffer_gl0_inv 22755; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 22756; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 22757; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 22758; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 22759; GFX11-CU-NEXT: s_endpgm 22760; 22761; GFX12-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22762; GFX12-WGP: ; %bb.0: ; %entry 22763; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22764; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 22765; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 22766; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 22767; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 22768; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 22769; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22770; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 22771; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22772; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22773; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 22774; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22775; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22776; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22777; GFX12-WGP-NEXT: s_wait_storecnt 0x0 22778; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22779; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 22780; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 22781; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 22782; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 22783; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 22784; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 22785; GFX12-WGP-NEXT: s_wait_dscnt 0x0 22786; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 22787; GFX12-WGP-NEXT: s_endpgm 22788; 22789; GFX12-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: 22790; GFX12-CU: ; %bb.0: ; %entry 22791; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 22792; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 22793; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 22794; GFX12-CU-NEXT: s_wait_kmcnt 0x0 22795; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 22796; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 22797; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22798; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 22799; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22800; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22801; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 22802; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22803; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22804; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22805; GFX12-CU-NEXT: s_wait_storecnt 0x0 22806; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 22807; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 22808; GFX12-CU-NEXT: s_wait_samplecnt 0x0 22809; GFX12-CU-NEXT: s_wait_loadcnt 0x0 22810; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 22811; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 22812; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 22813; GFX12-CU-NEXT: s_wait_dscnt 0x0 22814; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 22815; GFX12-CU-NEXT: s_endpgm 22816 ptr %out, i32 %in, i32 %old) { 22817entry: 22818 %gep = getelementptr i32, ptr %out, i32 4 22819 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst 22820 %val0 = extractvalue { i32, i1 } %val, 0 22821 store i32 %val0, ptr %out, align 4 22822 ret void 22823} 22824 22825define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( 22826; GFX7-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22827; GFX7: ; %bb.0: ; %entry 22828; GFX7-NEXT: s_mov_b64 s[6:7], s[8:9] 22829; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22830; GFX7-NEXT: s_load_dword s9, s[6:7], 0x2 22831; GFX7-NEXT: s_load_dword s8, s[6:7], 0x3 22832; GFX7-NEXT: s_mov_b64 s[12:13], 16 22833; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22834; GFX7-NEXT: s_mov_b32 s6, s4 22835; GFX7-NEXT: s_mov_b32 s7, s5 22836; GFX7-NEXT: s_mov_b32 s11, s12 22837; GFX7-NEXT: s_mov_b32 s10, s13 22838; GFX7-NEXT: s_add_u32 s6, s6, s11 22839; GFX7-NEXT: s_addc_u32 s10, s7, s10 22840; GFX7-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22841; GFX7-NEXT: s_mov_b32 s7, s10 22842; GFX7-NEXT: v_mov_b32_e32 v2, s9 22843; GFX7-NEXT: v_mov_b32_e32 v0, s8 22844; GFX7-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22845; GFX7-NEXT: v_mov_b32_e32 v3, v0 22846; GFX7-NEXT: v_mov_b32_e32 v0, s6 22847; GFX7-NEXT: v_mov_b32_e32 v1, s7 22848; GFX7-NEXT: s_waitcnt vmcnt(0) 22849; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22850; GFX7-NEXT: s_waitcnt vmcnt(0) 22851; GFX7-NEXT: buffer_wbinvl1_vol 22852; GFX7-NEXT: v_mov_b32_e32 v0, s4 22853; GFX7-NEXT: v_mov_b32_e32 v1, s5 22854; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22855; GFX7-NEXT: flat_store_dword v[0:1], v2 22856; GFX7-NEXT: s_endpgm 22857; 22858; GFX10-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22859; GFX10-WGP: ; %bb.0: ; %entry 22860; GFX10-WGP-NEXT: s_mov_b64 s[6:7], s[8:9] 22861; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22862; GFX10-WGP-NEXT: s_load_dword s9, s[6:7], 0x8 22863; GFX10-WGP-NEXT: s_load_dword s8, s[6:7], 0xc 22864; GFX10-WGP-NEXT: s_mov_b64 s[12:13], 16 22865; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22866; GFX10-WGP-NEXT: s_mov_b32 s6, s4 22867; GFX10-WGP-NEXT: s_mov_b32 s7, s5 22868; GFX10-WGP-NEXT: s_mov_b32 s11, s12 22869; GFX10-WGP-NEXT: s_mov_b32 s10, s13 22870; GFX10-WGP-NEXT: s_add_u32 s6, s6, s11 22871; GFX10-WGP-NEXT: s_addc_u32 s10, s7, s10 22872; GFX10-WGP-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22873; GFX10-WGP-NEXT: s_mov_b32 s7, s10 22874; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s9 22875; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s8 22876; GFX10-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22877; GFX10-WGP-NEXT: v_mov_b32_e32 v3, v0 22878; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s6 22879; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s7 22880; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22881; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 22882; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22883; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 22884; GFX10-WGP-NEXT: buffer_gl1_inv 22885; GFX10-WGP-NEXT: buffer_gl0_inv 22886; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s4 22887; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 22888; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 22889; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 22890; GFX10-WGP-NEXT: s_endpgm 22891; 22892; GFX10-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22893; GFX10-CU: ; %bb.0: ; %entry 22894; GFX10-CU-NEXT: s_mov_b64 s[6:7], s[8:9] 22895; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 22896; GFX10-CU-NEXT: s_load_dword s9, s[6:7], 0x8 22897; GFX10-CU-NEXT: s_load_dword s8, s[6:7], 0xc 22898; GFX10-CU-NEXT: s_mov_b64 s[12:13], 16 22899; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22900; GFX10-CU-NEXT: s_mov_b32 s6, s4 22901; GFX10-CU-NEXT: s_mov_b32 s7, s5 22902; GFX10-CU-NEXT: s_mov_b32 s11, s12 22903; GFX10-CU-NEXT: s_mov_b32 s10, s13 22904; GFX10-CU-NEXT: s_add_u32 s6, s6, s11 22905; GFX10-CU-NEXT: s_addc_u32 s10, s7, s10 22906; GFX10-CU-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 22907; GFX10-CU-NEXT: s_mov_b32 s7, s10 22908; GFX10-CU-NEXT: v_mov_b32_e32 v2, s9 22909; GFX10-CU-NEXT: v_mov_b32_e32 v0, s8 22910; GFX10-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22911; GFX10-CU-NEXT: v_mov_b32_e32 v3, v0 22912; GFX10-CU-NEXT: v_mov_b32_e32 v0, s6 22913; GFX10-CU-NEXT: v_mov_b32_e32 v1, s7 22914; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22915; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 22916; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22917; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 22918; GFX10-CU-NEXT: buffer_gl1_inv 22919; GFX10-CU-NEXT: buffer_gl0_inv 22920; GFX10-CU-NEXT: v_mov_b32_e32 v0, s4 22921; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 22922; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 22923; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 22924; GFX10-CU-NEXT: s_endpgm 22925; 22926; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22927; SKIP-CACHE-INV: ; %bb.0: ; %entry 22928; SKIP-CACHE-INV-NEXT: s_mov_b64 s[2:3], s[4:5] 22929; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 22930; SKIP-CACHE-INV-NEXT: s_load_dword s5, s[2:3], 0x2 22931; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[2:3], 0x3 22932; SKIP-CACHE-INV-NEXT: s_mov_b64 s[8:9], 16 22933; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22934; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, s0 22935; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s1 22936; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s8 22937; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s9 22938; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, s7 22939; SKIP-CACHE-INV-NEXT: s_addc_u32 s6, s3, s6 22940; SKIP-CACHE-INV-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 22941; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, s6 22942; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s5 22943; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 22944; SKIP-CACHE-INV-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22945; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, v0 22946; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 22947; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 22948; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22949; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 22950; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 22951; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 22952; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 22953; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 22954; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 22955; SKIP-CACHE-INV-NEXT: s_endpgm 22956; 22957; GFX90A-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22958; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry 22959; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22960; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22961; GFX90A-NOTTGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22962; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22963; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22964; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22965; GFX90A-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22966; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22967; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22968; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 22969; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22970; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22971; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 22972; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 22973; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol 22974; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22975; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22976; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 22977; GFX90A-NOTTGSPLIT-NEXT: s_endpgm 22978; 22979; GFX90A-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 22980; GFX90A-TGSPLIT: ; %bb.0: ; %entry 22981; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 22982; GFX90A-TGSPLIT-NEXT: s_load_dword s7, s[8:9], 0x8 22983; GFX90A-TGSPLIT-NEXT: s_load_dword s6, s[8:9], 0xc 22984; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 22985; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s7 22986; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s6 22987; GFX90A-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 22988; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 22989; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22990; GFX90A-TGSPLIT-NEXT: buffer_wbl2 22991; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22992; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc 22993; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 22994; GFX90A-TGSPLIT-NEXT: buffer_invl2 22995; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol 22996; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1] 22997; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 22998; GFX90A-TGSPLIT-NEXT: s_endpgm 22999; 23000; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23001; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry 23002; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 23003; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 23004; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 23005; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 23006; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 23007; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 23008; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23009; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 23010; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 23011; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 23012; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 23013; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 23014; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) 23015; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 23016; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 23017; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 23018; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 23019; GFX940-NOTTGSPLIT-NEXT: s_endpgm 23020; 23021; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23022; GFX940-TGSPLIT: ; %bb.0: ; %entry 23023; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 23024; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 23025; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc 23026; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) 23027; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 23028; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 23029; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23030; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 23031; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 23032; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 23033; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 23034; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 23035; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) 23036; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 23037; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 23038; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 23039; GFX940-TGSPLIT-NEXT: s_endpgm 23040; 23041; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23042; GFX11-WGP: ; %bb.0: ; %entry 23043; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 23044; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 23045; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 23046; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 23047; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s3 23048; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 23049; GFX11-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23050; GFX11-WGP-NEXT: v_mov_b32_e32 v3, v0 23051; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 23052; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 23053; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 23054; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 23055; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 23056; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) 23057; GFX11-WGP-NEXT: buffer_gl1_inv 23058; GFX11-WGP-NEXT: buffer_gl0_inv 23059; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0 23060; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 23061; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) 23062; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 23063; GFX11-WGP-NEXT: s_endpgm 23064; 23065; GFX11-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23066; GFX11-CU: ; %bb.0: ; %entry 23067; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 23068; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 23069; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 23070; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 23071; GFX11-CU-NEXT: v_mov_b32_e32 v2, s3 23072; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 23073; GFX11-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23074; GFX11-CU-NEXT: v_mov_b32_e32 v3, v0 23075; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 23076; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 23077; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 23078; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 23079; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc 23080; GFX11-CU-NEXT: s_waitcnt vmcnt(0) 23081; GFX11-CU-NEXT: buffer_gl1_inv 23082; GFX11-CU-NEXT: buffer_gl0_inv 23083; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0 23084; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 23085; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) 23086; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 23087; GFX11-CU-NEXT: s_endpgm 23088; 23089; GFX12-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23090; GFX12-WGP: ; %bb.0: ; %entry 23091; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 23092; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x8 23093; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0xc 23094; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 23095; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s3 23096; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2 23097; GFX12-WGP-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23098; GFX12-WGP-NEXT: v_mov_b32_e32 v3, v0 23099; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 23100; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 23101; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS 23102; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 23103; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 23104; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 23105; GFX12-WGP-NEXT: s_wait_storecnt 0x0 23106; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 23107; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 23108; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 23109; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 23110; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS 23111; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 23112; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 23113; GFX12-WGP-NEXT: s_wait_dscnt 0x0 23114; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 23115; GFX12-WGP-NEXT: s_endpgm 23116; 23117; GFX12-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 23118; GFX12-CU: ; %bb.0: ; %entry 23119; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 23120; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x8 23121; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0xc 23122; GFX12-CU-NEXT: s_wait_kmcnt 0x0 23123; GFX12-CU-NEXT: v_mov_b32_e32 v2, s3 23124; GFX12-CU-NEXT: v_mov_b32_e32 v0, s2 23125; GFX12-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 23126; GFX12-CU-NEXT: v_mov_b32_e32 v3, v0 23127; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 23128; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 23129; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS 23130; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 23131; GFX12-CU-NEXT: s_wait_samplecnt 0x0 23132; GFX12-CU-NEXT: s_wait_loadcnt 0x0 23133; GFX12-CU-NEXT: s_wait_storecnt 0x0 23134; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 23135; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 23136; GFX12-CU-NEXT: s_wait_samplecnt 0x0 23137; GFX12-CU-NEXT: s_wait_loadcnt 0x0 23138; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS 23139; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 23140; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 23141; GFX12-CU-NEXT: s_wait_dscnt 0x0 23142; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 23143; GFX12-CU-NEXT: s_endpgm 23144 ptr %out, i32 %in, i32 %old) { 23145entry: 23146 %gep = getelementptr i32, ptr %out, i32 4 23147 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 23148 %val0 = extractvalue { i32, i1 } %val, 0 23149 store i32 %val0, ptr %out, align 4 23150 ret void 23151} 23152