xref: /llvm-project/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll (revision ef067f52044042fbe1b6fa21a90bfdbcf1622b02)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4define i32 @atomic_nand_i32_lds(ptr addrspace(3) %ptr) nounwind {
5; GCN-LABEL: atomic_nand_i32_lds:
6; GCN:       ; %bb.0:
7; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GCN-NEXT:    ds_read_b32 v1, v0
9; GCN-NEXT:    s_mov_b64 s[4:5], 0
10; GCN-NEXT:  .LBB0_1: ; %atomicrmw.start
11; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    v_mov_b32_e32 v2, v1
14; GCN-NEXT:    v_not_b32_e32 v1, v2
15; GCN-NEXT:    v_or_b32_e32 v1, -5, v1
16; GCN-NEXT:    ds_cmpst_rtn_b32 v1, v0, v2, v1
17; GCN-NEXT:    s_waitcnt lgkmcnt(0)
18; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
19; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
20; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
21; GCN-NEXT:    s_cbranch_execnz .LBB0_1
22; GCN-NEXT:  ; %bb.2: ; %atomicrmw.end
23; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
24; GCN-NEXT:    v_mov_b32_e32 v0, v1
25; GCN-NEXT:    s_setpc_b64 s[30:31]
26  %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
27  ret i32 %result
28}
29
30define i32 @atomic_nand_i32_global(ptr addrspace(1) %ptr) nounwind {
31; GCN-LABEL: atomic_nand_i32_global:
32; GCN:       ; %bb.0:
33; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GCN-NEXT:    global_load_dword v2, v[0:1], off
35; GCN-NEXT:    s_mov_b64 s[4:5], 0
36; GCN-NEXT:  .LBB1_1: ; %atomicrmw.start
37; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
38; GCN-NEXT:    s_waitcnt vmcnt(0)
39; GCN-NEXT:    v_mov_b32_e32 v3, v2
40; GCN-NEXT:    v_not_b32_e32 v2, v3
41; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
42; GCN-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
43; GCN-NEXT:    s_waitcnt vmcnt(0)
44; GCN-NEXT:    buffer_wbinvl1_vol
45; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
46; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
47; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
48; GCN-NEXT:    s_cbranch_execnz .LBB1_1
49; GCN-NEXT:  ; %bb.2: ; %atomicrmw.end
50; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
51; GCN-NEXT:    v_mov_b32_e32 v0, v2
52; GCN-NEXT:    s_setpc_b64 s[30:31]
53  %result = atomicrmw nand ptr addrspace(1) %ptr, i32 4 seq_cst
54  ret i32 %result
55}
56
57define i32 @atomic_nand_i32_flat(ptr %ptr) nounwind {
58; GCN-LABEL: atomic_nand_i32_flat:
59; GCN:       ; %bb.0:
60; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GCN-NEXT:    flat_load_dword v2, v[0:1]
62; GCN-NEXT:    s_mov_b64 s[4:5], 0
63; GCN-NEXT:  .LBB2_1: ; %atomicrmw.start
64; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
65; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
66; GCN-NEXT:    v_mov_b32_e32 v3, v2
67; GCN-NEXT:    v_not_b32_e32 v2, v3
68; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
69; GCN-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
70; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
71; GCN-NEXT:    buffer_wbinvl1_vol
72; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
73; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
74; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
75; GCN-NEXT:    s_cbranch_execnz .LBB2_1
76; GCN-NEXT:  ; %bb.2: ; %atomicrmw.end
77; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
78; GCN-NEXT:    v_mov_b32_e32 v0, v2
79; GCN-NEXT:    s_setpc_b64 s[30:31]
80  %result = atomicrmw nand ptr %ptr, i32 4 seq_cst
81  ret i32 %result
82}
83