1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=atomic-expand < %s | FileCheck -check-prefix=IR %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s 4 5define i32 @load_atomic_private_seq_cst_i32(ptr addrspace(5) %ptr) { 6; IR-LABEL: define i32 @load_atomic_private_seq_cst_i32( 7; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { 8; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 9; IR-NEXT: ret i32 [[LOAD]] 10; 11; GCN-LABEL: load_atomic_private_seq_cst_i32: 12; GCN: ; %bb.0: 13; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 15; GCN-NEXT: s_waitcnt vmcnt(0) 16; GCN-NEXT: s_setpc_b64 s[30:31] 17 %load = load atomic i32, ptr addrspace(5) %ptr seq_cst, align 4 18 ret i32 %load 19} 20 21define i64 @load_atomic_private_seq_cst_i64(ptr addrspace(5) %ptr) { 22; IR-LABEL: define i64 @load_atomic_private_seq_cst_i64( 23; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 24; IR-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8 25; IR-NEXT: ret i64 [[LOAD]] 26; 27; GCN-LABEL: load_atomic_private_seq_cst_i64: 28; GCN: ; %bb.0: 29; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GCN-NEXT: v_add_i32_e32 v1, vcc, 4, v0 31; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 32; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen 33; GCN-NEXT: s_waitcnt vmcnt(0) 34; GCN-NEXT: s_setpc_b64 s[30:31] 35 %load = load atomic i64, ptr addrspace(5) %ptr seq_cst, align 8 36 ret i64 %load 37} 38 39define void @atomic_store_seq_cst_i32(ptr addrspace(5) %ptr, i32 %val) { 40; IR-LABEL: define void @atomic_store_seq_cst_i32( 41; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { 42; IR-NEXT: store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4 43; IR-NEXT: ret void 44; 45; GCN-LABEL: atomic_store_seq_cst_i32: 46; GCN: ; %bb.0: 47; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 49; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 50; GCN-NEXT: s_setpc_b64 s[30:31] 51 store atomic i32 %val, ptr addrspace(5) %ptr seq_cst, align 4 52 ret void 53} 54 55define void @atomic_store_seq_cst_i64(ptr addrspace(5) %ptr, i64 %val) { 56; IR-LABEL: define void @atomic_store_seq_cst_i64( 57; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i64 [[VAL:%.*]]) #[[ATTR0]] { 58; IR-NEXT: store i64 [[VAL]], ptr addrspace(5) [[PTR]], align 8 59; IR-NEXT: ret void 60; 61; GCN-LABEL: atomic_store_seq_cst_i64: 62; GCN: ; %bb.0: 63; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GCN-NEXT: v_add_i32_e32 v3, vcc, 4, v0 65; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 66; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 67; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 68; GCN-NEXT: s_setpc_b64 s[30:31] 69 store atomic i64 %val, ptr addrspace(5) %ptr seq_cst, align 8 70 ret void 71} 72 73define i32 @load_atomic_private_seq_cst_syncscope_i32(ptr addrspace(5) %ptr) { 74; IR-LABEL: define i32 @load_atomic_private_seq_cst_syncscope_i32( 75; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 76; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 77; IR-NEXT: ret i32 [[LOAD]] 78; 79; GCN-LABEL: load_atomic_private_seq_cst_syncscope_i32: 80; GCN: ; %bb.0: 81; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 83; GCN-NEXT: s_waitcnt vmcnt(0) 84; GCN-NEXT: s_setpc_b64 s[30:31] 85 %load = load atomic i32, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4 86 ret i32 %load 87} 88 89define void @atomic_store_seq_cst_syncscope_i32(ptr addrspace(5) %ptr, i32 %val) { 90; IR-LABEL: define void @atomic_store_seq_cst_syncscope_i32( 91; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { 92; IR-NEXT: store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4 93; IR-NEXT: ret void 94; 95; GCN-LABEL: atomic_store_seq_cst_syncscope_i32: 96; GCN: ; %bb.0: 97; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 99; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 100; GCN-NEXT: s_setpc_b64 s[30:31] 101 store atomic i32 %val, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4 102 ret void 103} 104 105define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) { 106; IR-LABEL: define i32 @cmpxchg_private_i32( 107; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 108; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 109; IR-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 110; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]] 111; IR-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[PTR]], align 4 112; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0 113; IR-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1 114; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 115; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 116; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1 117; IR-NEXT: ret i32 [[RESULT_0]] 118; 119; GCN-LABEL: cmpxchg_private_i32: 120; GCN: ; %bb.0: 121; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 123; GCN-NEXT: s_mov_b32 s7, 0xf000 124; GCN-NEXT: s_mov_b32 s6, -1 125; GCN-NEXT: s_waitcnt vmcnt(0) 126; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 127; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 1, vcc 128; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 129; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 130; GCN-NEXT: buffer_store_byte v0, off, s[4:7], 0 131; GCN-NEXT: s_waitcnt expcnt(0) 132; GCN-NEXT: v_mov_b32_e32 v0, v1 133; GCN-NEXT: s_waitcnt vmcnt(0) 134; GCN-NEXT: s_setpc_b64 s[30:31] 135 %result = cmpxchg ptr addrspace(5) %ptr, i32 0, i32 1 acq_rel monotonic 136 %result.0 = extractvalue { i32, i1 } %result, 0 137 %result.1 = extractvalue { i32, i1 } %result, 1 138 store i1 %result.1, ptr addrspace(1) poison 139 ret i32 %result.0 140} 141 142define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) { 143; IR-LABEL: define i64 @cmpxchg_private_i64( 144; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 145; IR-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8 146; IR-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 147; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]] 148; IR-NEXT: store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 8 149; IR-NEXT: [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0 150; IR-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1 151; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 152; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 153; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1 154; IR-NEXT: ret i64 [[RESULT_0]] 155; 156; GCN-LABEL: cmpxchg_private_i64: 157; GCN: ; %bb.0: 158; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GCN-NEXT: v_mov_b32_e32 v2, v0 160; GCN-NEXT: v_add_i32_e32 v3, vcc, 4, v2 161; GCN-NEXT: buffer_load_dword v1, v3, s[0:3], 0 offen 162; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 163; GCN-NEXT: s_mov_b32 s7, 0xf000 164; GCN-NEXT: s_mov_b32 s6, -1 165; GCN-NEXT: s_waitcnt vmcnt(0) 166; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 167; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc 168; GCN-NEXT: buffer_store_dword v4, v3, s[0:3], 0 offen 169; GCN-NEXT: v_cndmask_b32_e64 v3, v0, 1, vcc 170; GCN-NEXT: s_waitcnt expcnt(0) 171; GCN-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 172; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen 173; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 174; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 175; GCN-NEXT: s_setpc_b64 s[30:31] 176 %result = cmpxchg ptr addrspace(5) %ptr, i64 0, i64 1 acq_rel monotonic 177 %result.0 = extractvalue { i64, i1 } %result, 0 178 %result.1 = extractvalue { i64, i1 } %result, 1 179 store i1 %result.1, ptr addrspace(1) poison 180 ret i64 %result.0 181} 182 183 184define i32 @atomicrmw_xchg_private_i32(ptr addrspace(5) %ptr) { 185; IR-LABEL: define i32 @atomicrmw_xchg_private_i32( 186; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 187; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 188; IR-NEXT: store i32 4, ptr addrspace(5) [[PTR]], align 4 189; IR-NEXT: ret i32 [[TMP1]] 190; 191; GCN-LABEL: atomicrmw_xchg_private_i32: 192; GCN: ; %bb.0: 193; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 195; GCN-NEXT: v_mov_b32_e32 v2, 4 196; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 197; GCN-NEXT: s_waitcnt vmcnt(1) 198; GCN-NEXT: v_mov_b32_e32 v0, v1 199; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 200; GCN-NEXT: s_setpc_b64 s[30:31] 201 %result = atomicrmw xchg ptr addrspace(5) %ptr, i32 4 seq_cst 202 ret i32 %result 203} 204 205define i32 @atomicrmw_add_private_i32(ptr addrspace(5) %ptr) { 206; IR-LABEL: define i32 @atomicrmw_add_private_i32( 207; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 208; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 209; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP1]], 4 210; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 211; IR-NEXT: ret i32 [[TMP1]] 212; 213; GCN-LABEL: atomicrmw_add_private_i32: 214; GCN: ; %bb.0: 215; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 217; GCN-NEXT: s_waitcnt vmcnt(0) 218; GCN-NEXT: v_add_i32_e32 v2, vcc, 4, v1 219; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 220; GCN-NEXT: v_mov_b32_e32 v0, v1 221; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 222; GCN-NEXT: s_setpc_b64 s[30:31] 223 %result = atomicrmw add ptr addrspace(5) %ptr, i32 4 seq_cst 224 ret i32 %result 225} 226 227define i32 @atomicrmw_sub_private_i32(ptr addrspace(5) %ptr) { 228; IR-LABEL: define i32 @atomicrmw_sub_private_i32( 229; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 230; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 231; IR-NEXT: [[NEW:%.*]] = sub i32 [[TMP1]], 4 232; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 233; IR-NEXT: ret i32 [[TMP1]] 234; 235; GCN-LABEL: atomicrmw_sub_private_i32: 236; GCN: ; %bb.0: 237; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 239; GCN-NEXT: s_waitcnt vmcnt(0) 240; GCN-NEXT: v_add_i32_e32 v2, vcc, -4, v1 241; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 242; GCN-NEXT: v_mov_b32_e32 v0, v1 243; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 244; GCN-NEXT: s_setpc_b64 s[30:31] 245 %result = atomicrmw sub ptr addrspace(5) %ptr, i32 4 seq_cst 246 ret i32 %result 247} 248 249define i32 @atomicrmw_and_private_i32(ptr addrspace(5) %ptr) { 250; IR-LABEL: define i32 @atomicrmw_and_private_i32( 251; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 252; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 253; IR-NEXT: [[NEW:%.*]] = and i32 [[TMP1]], 4 254; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 255; IR-NEXT: ret i32 [[TMP1]] 256; 257; GCN-LABEL: atomicrmw_and_private_i32: 258; GCN: ; %bb.0: 259; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 261; GCN-NEXT: s_waitcnt vmcnt(0) 262; GCN-NEXT: v_and_b32_e32 v2, 4, v1 263; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 264; GCN-NEXT: v_mov_b32_e32 v0, v1 265; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 266; GCN-NEXT: s_setpc_b64 s[30:31] 267 %result = atomicrmw and ptr addrspace(5) %ptr, i32 4 seq_cst 268 ret i32 %result 269} 270 271define i32 @atomicrmw_nand_private_i32(ptr addrspace(5) %ptr) { 272; IR-LABEL: define i32 @atomicrmw_nand_private_i32( 273; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 274; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 275; IR-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 4 276; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 277; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 278; IR-NEXT: ret i32 [[TMP1]] 279; 280; GCN-LABEL: atomicrmw_nand_private_i32: 281; GCN: ; %bb.0: 282; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 284; GCN-NEXT: s_waitcnt vmcnt(0) 285; GCN-NEXT: v_not_b32_e32 v2, v1 286; GCN-NEXT: v_or_b32_e32 v2, -5, v2 287; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 288; GCN-NEXT: v_mov_b32_e32 v0, v1 289; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 290; GCN-NEXT: s_setpc_b64 s[30:31] 291 %result = atomicrmw nand ptr addrspace(5) %ptr, i32 4 seq_cst 292 ret i32 %result 293} 294 295define i32 @atomicrmw_or_private_i32(ptr addrspace(5) %ptr) { 296; IR-LABEL: define i32 @atomicrmw_or_private_i32( 297; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 298; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 299; IR-NEXT: [[NEW:%.*]] = or i32 [[TMP1]], 4 300; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 301; IR-NEXT: ret i32 [[TMP1]] 302; 303; GCN-LABEL: atomicrmw_or_private_i32: 304; GCN: ; %bb.0: 305; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 307; GCN-NEXT: s_waitcnt vmcnt(0) 308; GCN-NEXT: v_or_b32_e32 v2, 4, v1 309; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 310; GCN-NEXT: v_mov_b32_e32 v0, v1 311; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 312; GCN-NEXT: s_setpc_b64 s[30:31] 313 %result = atomicrmw or ptr addrspace(5) %ptr, i32 4 seq_cst 314 ret i32 %result 315} 316 317define i32 @atomicrmw_xor_private_i32(ptr addrspace(5) %ptr) { 318; IR-LABEL: define i32 @atomicrmw_xor_private_i32( 319; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 320; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 321; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP1]], 4 322; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 323; IR-NEXT: ret i32 [[TMP1]] 324; 325; GCN-LABEL: atomicrmw_xor_private_i32: 326; GCN: ; %bb.0: 327; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 329; GCN-NEXT: s_waitcnt vmcnt(0) 330; GCN-NEXT: v_xor_b32_e32 v2, 4, v1 331; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 332; GCN-NEXT: v_mov_b32_e32 v0, v1 333; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 334; GCN-NEXT: s_setpc_b64 s[30:31] 335 %result = atomicrmw xor ptr addrspace(5) %ptr, i32 4 seq_cst 336 ret i32 %result 337} 338 339define i32 @atomicrmw_max_private_i32(ptr addrspace(5) %ptr) { 340; IR-LABEL: define i32 @atomicrmw_max_private_i32( 341; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 342; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 343; IR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4 344; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 345; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 346; IR-NEXT: ret i32 [[TMP1]] 347; 348; GCN-LABEL: atomicrmw_max_private_i32: 349; GCN: ; %bb.0: 350; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 352; GCN-NEXT: s_waitcnt vmcnt(0) 353; GCN-NEXT: v_max_i32_e32 v2, 4, v1 354; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 355; GCN-NEXT: v_mov_b32_e32 v0, v1 356; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 357; GCN-NEXT: s_setpc_b64 s[30:31] 358 %result = atomicrmw max ptr addrspace(5) %ptr, i32 4 seq_cst 359 ret i32 %result 360} 361 362define i32 @atomicrmw_min_private_i32(ptr addrspace(5) %ptr) { 363; IR-LABEL: define i32 @atomicrmw_min_private_i32( 364; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 365; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 366; IR-NEXT: [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4 367; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 368; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 369; IR-NEXT: ret i32 [[TMP1]] 370; 371; GCN-LABEL: atomicrmw_min_private_i32: 372; GCN: ; %bb.0: 373; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 375; GCN-NEXT: s_waitcnt vmcnt(0) 376; GCN-NEXT: v_min_i32_e32 v2, 4, v1 377; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 378; GCN-NEXT: v_mov_b32_e32 v0, v1 379; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 380; GCN-NEXT: s_setpc_b64 s[30:31] 381 %result = atomicrmw min ptr addrspace(5) %ptr, i32 4 seq_cst 382 ret i32 %result 383} 384 385define i32 @atomicrmw_umax_private_i32(ptr addrspace(5) %ptr) { 386; IR-LABEL: define i32 @atomicrmw_umax_private_i32( 387; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 388; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 389; IR-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4 390; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 391; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 392; IR-NEXT: ret i32 [[TMP1]] 393; 394; GCN-LABEL: atomicrmw_umax_private_i32: 395; GCN: ; %bb.0: 396; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 397; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 398; GCN-NEXT: s_waitcnt vmcnt(0) 399; GCN-NEXT: v_max_u32_e32 v2, 4, v1 400; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 401; GCN-NEXT: v_mov_b32_e32 v0, v1 402; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 403; GCN-NEXT: s_setpc_b64 s[30:31] 404 %result = atomicrmw umax ptr addrspace(5) %ptr, i32 4 seq_cst 405 ret i32 %result 406} 407 408define i32 @atomicrmw_umin_private_i32(ptr addrspace(5) %ptr) { 409; IR-LABEL: define i32 @atomicrmw_umin_private_i32( 410; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 411; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 412; IR-NEXT: [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4 413; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 414; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 415; IR-NEXT: ret i32 [[TMP1]] 416; 417; GCN-LABEL: atomicrmw_umin_private_i32: 418; GCN: ; %bb.0: 419; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 421; GCN-NEXT: s_waitcnt vmcnt(0) 422; GCN-NEXT: v_min_u32_e32 v2, 4, v1 423; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 424; GCN-NEXT: v_mov_b32_e32 v0, v1 425; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 426; GCN-NEXT: s_setpc_b64 s[30:31] 427 %result = atomicrmw umin ptr addrspace(5) %ptr, i32 4 seq_cst 428 ret i32 %result 429} 430 431define float @atomicrmw_fadd_private_f32(ptr addrspace(5) %ptr) { 432; IR-LABEL: define float @atomicrmw_fadd_private_f32( 433; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 434; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4 435; IR-NEXT: [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00 436; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4 437; IR-NEXT: ret float [[TMP1]] 438; 439; GCN-LABEL: atomicrmw_fadd_private_f32: 440; GCN: ; %bb.0: 441; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 443; GCN-NEXT: s_waitcnt vmcnt(0) 444; GCN-NEXT: v_add_f32_e32 v2, 2.0, v1 445; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 446; GCN-NEXT: v_mov_b32_e32 v0, v1 447; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 448; GCN-NEXT: s_setpc_b64 s[30:31] 449 %result = atomicrmw fadd ptr addrspace(5) %ptr, float 2.0 seq_cst 450 ret float %result 451} 452 453define bfloat @atomicrmw_fadd_private_bf16(ptr addrspace(5) %ptr) { 454; IR-LABEL: define bfloat @atomicrmw_fadd_private_bf16( 455; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 456; IR-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(5) [[PTR]], align 2 457; IR-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP1]], 0xR4000 458; IR-NEXT: store bfloat [[NEW]], ptr addrspace(5) [[PTR]], align 2 459; IR-NEXT: ret bfloat [[TMP1]] 460; 461; GCN-LABEL: atomicrmw_fadd_private_bf16: 462; GCN: ; %bb.0: 463; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 464; GCN-NEXT: buffer_load_ushort v1, v0, s[0:3], 0 offen 465; GCN-NEXT: s_waitcnt vmcnt(0) 466; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 467; GCN-NEXT: v_add_f32_e32 v2, 2.0, v1 468; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2 469; GCN-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen 470; GCN-NEXT: v_mov_b32_e32 v0, v1 471; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 472; GCN-NEXT: s_setpc_b64 s[30:31] 473 %result = atomicrmw fadd ptr addrspace(5) %ptr, bfloat 2.0 seq_cst 474 ret bfloat %result 475} 476 477define float @atomicrmw_fsub_private_i32(ptr addrspace(5) %ptr, float %val) { 478; IR-LABEL: define float @atomicrmw_fsub_private_i32( 479; IR-SAME: ptr addrspace(5) [[PTR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] { 480; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4 481; IR-NEXT: [[NEW:%.*]] = fsub float [[TMP1]], [[VAL]] 482; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4 483; IR-NEXT: ret float [[TMP1]] 484; 485; GCN-LABEL: atomicrmw_fsub_private_i32: 486; GCN: ; %bb.0: 487; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GCN-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen 489; GCN-NEXT: s_waitcnt vmcnt(0) 490; GCN-NEXT: v_sub_f32_e32 v1, v2, v1 491; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 492; GCN-NEXT: v_mov_b32_e32 v0, v2 493; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 494; GCN-NEXT: s_setpc_b64 s[30:31] 495 %result = atomicrmw fsub ptr addrspace(5) %ptr, float %val seq_cst 496 ret float %result 497} 498 499define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(ptr addrspace(1) %out, i32 %in) nounwind { 500; IR-LABEL: define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote( 501; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1:[0-9]+]] { 502; IR-NEXT: entry: 503; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5) 504; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1 505; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4 506; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4 507; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]] 508; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4 509; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP0]], 7 510; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[GEP3]], align 4 511; IR-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT]], align 4 512; IR-NEXT: ret void 513; 514; GCN-LABEL: alloca_promote_atomicrmw_private_lds_promote: 515; GCN: ; %bb.0: ; %entry 516; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 517; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 518; GCN-NEXT: s_mov_b32 s3, 0xf000 519; GCN-NEXT: s_mov_b32 s2, -1 520; GCN-NEXT: s_waitcnt lgkmcnt(0) 521; GCN-NEXT: s_cmp_eq_u32 s6, 1 522; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 523; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 524; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 525; GCN-NEXT: s_endpgm 526entry: 527 %tmp = alloca [2 x i32], addrspace(5) 528 %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 529 store i32 0, ptr addrspace(5) %tmp 530 store i32 1, ptr addrspace(5) %gep2 531 %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in 532 %rmw = atomicrmw add ptr addrspace(5) %gep3, i32 7 acq_rel 533 store i32 %rmw, ptr addrspace(1) %out 534 ret void 535} 536 537define amdgpu_kernel void @alloca_promote_cmpxchg_private(ptr addrspace(1) %out, i32 %in) nounwind { 538; IR-LABEL: define amdgpu_kernel void @alloca_promote_cmpxchg_private( 539; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1]] { 540; IR-NEXT: entry: 541; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5) 542; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1 543; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4 544; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4 545; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]] 546; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4 547; IR-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 548; IR-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP0]] 549; IR-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[GEP3]], align 4 550; IR-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP0]], 0 551; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } [[TMP3]], i1 [[TMP1]], 1 552; IR-NEXT: [[VAL:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 553; IR-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4 554; IR-NEXT: ret void 555; 556; GCN-LABEL: alloca_promote_cmpxchg_private: 557; GCN: ; %bb.0: ; %entry 558; GCN-NEXT: s_load_dword s6, s[4:5], 0xb 559; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 560; GCN-NEXT: s_mov_b32 s3, 0xf000 561; GCN-NEXT: s_mov_b32 s2, -1 562; GCN-NEXT: s_waitcnt lgkmcnt(0) 563; GCN-NEXT: s_cmp_eq_u32 s6, 1 564; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 565; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 566; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 567; GCN-NEXT: s_endpgm 568entry: 569 %tmp = alloca [2 x i32], addrspace(5) 570 %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 571 store i32 0, ptr addrspace(5) %tmp 572 store i32 1, ptr addrspace(5) %gep2 573 %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in 574 %xchg = cmpxchg ptr addrspace(5) %gep3, i32 0, i32 1 acq_rel monotonic 575 %val = extractvalue { i32, i1 } %xchg, 0 576 store i32 %val, ptr addrspace(1) %out 577 ret void 578} 579 580define i32 @atomicrmw_inc_private_i32(ptr addrspace(5) %ptr) { 581; IR-LABEL: define i32 @atomicrmw_inc_private_i32( 582; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 583; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 584; IR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 585; IR-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], 4 586; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] 587; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 588; IR-NEXT: ret i32 [[TMP1]] 589; 590; GCN-LABEL: atomicrmw_inc_private_i32: 591; GCN: ; %bb.0: 592; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 593; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 594; GCN-NEXT: s_waitcnt vmcnt(0) 595; GCN-NEXT: v_add_i32_e32 v2, vcc, 1, v1 596; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 4, v1 597; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 598; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 599; GCN-NEXT: v_mov_b32_e32 v0, v1 600; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 601; GCN-NEXT: s_setpc_b64 s[30:31] 602 %result = atomicrmw uinc_wrap ptr addrspace(5) %ptr, i32 4 seq_cst 603 ret i32 %result 604} 605 606define i32 @atomicrmw_dec_private_i32(ptr addrspace(5) %ptr) { 607; IR-LABEL: define i32 @atomicrmw_dec_private_i32( 608; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] { 609; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4 610; IR-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 611; IR-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0 612; IR-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP1]], 4 613; IR-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 614; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 4, i32 [[TMP2]] 615; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 616; IR-NEXT: ret i32 [[TMP1]] 617; 618; GCN-LABEL: atomicrmw_dec_private_i32: 619; GCN: ; %bb.0: 620; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen 622; GCN-NEXT: s_waitcnt vmcnt(0) 623; GCN-NEXT: v_add_i32_e32 v2, vcc, -1, v1 624; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 625; GCN-NEXT: v_cmp_lt_u32_e64 s[4:5], 4, v1 626; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 627; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 4, s[4:5] 628; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 629; GCN-NEXT: v_mov_b32_e32 v0, v1 630; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) 631; GCN-NEXT: s_setpc_b64 s[30:31] 632 %result = atomicrmw udec_wrap ptr addrspace(5) %ptr, i32 4 seq_cst 633 ret i32 %result 634} 635