xref: /llvm-project/llvm/test/CodeGen/NVPTX/cmpxchg.ll (revision 892a804d93d44ddfd7cd351852fe6aef32d4dcd0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march nvptx64 --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s --check-prefixes=SM30,CHECK
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
4; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefixes=SM70,CHECK
5; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
6
7; TODO: these are system scope, but are compiled to gpu scope..
8; TODO: these are seq_cst, but are compiled to relaxed..
9
10; CHECK-LABEL: relaxed_sys_i8
11define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
12; SM30-LABEL: relaxed_sys_i8(
13; SM30:       {
14; SM30-NEXT:    .reg .pred %p<3>;
15; SM30-NEXT:    .reg .b16 %rs<2>;
16; SM30-NEXT:    .reg .b32 %r<21>;
17; SM30-NEXT:    .reg .b64 %rd<3>;
18; SM30-EMPTY:
19; SM30-NEXT:  // %bb.0:
20; SM30-NEXT:    ld.param.u8 %rs1, [relaxed_sys_i8_param_2];
21; SM30-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i8_param_0];
22; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
23; SM30-NEXT:    cvt.u32.u64 %r9, %rd2;
24; SM30-NEXT:    and.b32 %r10, %r9, 3;
25; SM30-NEXT:    shl.b32 %r1, %r10, 3;
26; SM30-NEXT:    mov.b32 %r11, 255;
27; SM30-NEXT:    shl.b32 %r12, %r11, %r1;
28; SM30-NEXT:    not.b32 %r2, %r12;
29; SM30-NEXT:    cvt.u32.u16 %r13, %rs1;
30; SM30-NEXT:    and.b32 %r14, %r13, 255;
31; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
32; SM30-NEXT:    ld.param.u8 %r15, [relaxed_sys_i8_param_1];
33; SM30-NEXT:    shl.b32 %r4, %r15, %r1;
34; SM30-NEXT:    ld.u32 %r16, [%rd1];
35; SM30-NEXT:    and.b32 %r20, %r16, %r2;
36; SM30-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop
37; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
38; SM30-NEXT:    or.b32 %r17, %r20, %r3;
39; SM30-NEXT:    or.b32 %r18, %r20, %r4;
40; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
41; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
42; SM30-NEXT:    @%p1 bra $L__BB0_3;
43; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
44; SM30-NEXT:    // in Loop: Header=BB0_1 Depth=1
45; SM30-NEXT:    and.b32 %r8, %r7, %r2;
46; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
47; SM30-NEXT:    mov.u32 %r20, %r8;
48; SM30-NEXT:    @%p2 bra $L__BB0_1;
49; SM30-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
50; SM30-NEXT:    st.param.b32 [func_retval0], %r13;
51; SM30-NEXT:    ret;
52;
53; SM70-LABEL: relaxed_sys_i8(
54; SM70:       {
55; SM70-NEXT:    .reg .pred %p<3>;
56; SM70-NEXT:    .reg .b16 %rs<2>;
57; SM70-NEXT:    .reg .b32 %r<21>;
58; SM70-NEXT:    .reg .b64 %rd<3>;
59; SM70-EMPTY:
60; SM70-NEXT:  // %bb.0:
61; SM70-NEXT:    ld.param.u8 %rs1, [relaxed_sys_i8_param_2];
62; SM70-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i8_param_0];
63; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
64; SM70-NEXT:    cvt.u32.u64 %r9, %rd2;
65; SM70-NEXT:    and.b32 %r10, %r9, 3;
66; SM70-NEXT:    shl.b32 %r1, %r10, 3;
67; SM70-NEXT:    mov.b32 %r11, 255;
68; SM70-NEXT:    shl.b32 %r12, %r11, %r1;
69; SM70-NEXT:    not.b32 %r2, %r12;
70; SM70-NEXT:    cvt.u32.u16 %r13, %rs1;
71; SM70-NEXT:    and.b32 %r14, %r13, 255;
72; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
73; SM70-NEXT:    ld.param.u8 %r15, [relaxed_sys_i8_param_1];
74; SM70-NEXT:    shl.b32 %r4, %r15, %r1;
75; SM70-NEXT:    ld.u32 %r16, [%rd1];
76; SM70-NEXT:    and.b32 %r20, %r16, %r2;
77; SM70-NEXT:  $L__BB0_1: // %partword.cmpxchg.loop
78; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
79; SM70-NEXT:    or.b32 %r17, %r20, %r3;
80; SM70-NEXT:    or.b32 %r18, %r20, %r4;
81; SM70-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
82; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
83; SM70-NEXT:    @%p1 bra $L__BB0_3;
84; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
85; SM70-NEXT:    // in Loop: Header=BB0_1 Depth=1
86; SM70-NEXT:    and.b32 %r8, %r7, %r2;
87; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
88; SM70-NEXT:    mov.u32 %r20, %r8;
89; SM70-NEXT:    @%p2 bra $L__BB0_1;
90; SM70-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
91; SM70-NEXT:    st.param.b32 [func_retval0], %r13;
92; SM70-NEXT:    ret;
93  %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic
94  ret i8 %new
95}
96
97; CHECK-LABEL: relaxed_sys_i16
98define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
99; SM30-LABEL: relaxed_sys_i16(
100; SM30:       {
101; SM30-NEXT:    .reg .pred %p<3>;
102; SM30-NEXT:    .reg .b16 %rs<2>;
103; SM30-NEXT:    .reg .b32 %r<20>;
104; SM30-NEXT:    .reg .b64 %rd<3>;
105; SM30-EMPTY:
106; SM30-NEXT:  // %bb.0:
107; SM30-NEXT:    ld.param.u16 %rs1, [relaxed_sys_i16_param_2];
108; SM30-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i16_param_0];
109; SM30-NEXT:    and.b64 %rd1, %rd2, -4;
110; SM30-NEXT:    ld.param.u16 %r9, [relaxed_sys_i16_param_1];
111; SM30-NEXT:    cvt.u32.u64 %r10, %rd2;
112; SM30-NEXT:    and.b32 %r11, %r10, 3;
113; SM30-NEXT:    shl.b32 %r1, %r11, 3;
114; SM30-NEXT:    mov.b32 %r12, 65535;
115; SM30-NEXT:    shl.b32 %r13, %r12, %r1;
116; SM30-NEXT:    not.b32 %r2, %r13;
117; SM30-NEXT:    cvt.u32.u16 %r14, %rs1;
118; SM30-NEXT:    shl.b32 %r3, %r14, %r1;
119; SM30-NEXT:    shl.b32 %r4, %r9, %r1;
120; SM30-NEXT:    ld.u32 %r15, [%rd1];
121; SM30-NEXT:    and.b32 %r19, %r15, %r2;
122; SM30-NEXT:  $L__BB1_1: // %partword.cmpxchg.loop
123; SM30-NEXT:    // =>This Inner Loop Header: Depth=1
124; SM30-NEXT:    or.b32 %r16, %r19, %r3;
125; SM30-NEXT:    or.b32 %r17, %r19, %r4;
126; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
127; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
128; SM30-NEXT:    @%p1 bra $L__BB1_3;
129; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
130; SM30-NEXT:    // in Loop: Header=BB1_1 Depth=1
131; SM30-NEXT:    and.b32 %r8, %r7, %r2;
132; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
133; SM30-NEXT:    mov.u32 %r19, %r8;
134; SM30-NEXT:    @%p2 bra $L__BB1_1;
135; SM30-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
136; SM30-NEXT:    st.param.b32 [func_retval0], %r14;
137; SM30-NEXT:    ret;
138;
139; SM70-LABEL: relaxed_sys_i16(
140; SM70:       {
141; SM70-NEXT:    .reg .pred %p<3>;
142; SM70-NEXT:    .reg .b16 %rs<2>;
143; SM70-NEXT:    .reg .b32 %r<20>;
144; SM70-NEXT:    .reg .b64 %rd<3>;
145; SM70-EMPTY:
146; SM70-NEXT:  // %bb.0:
147; SM70-NEXT:    ld.param.u16 %rs1, [relaxed_sys_i16_param_2];
148; SM70-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i16_param_0];
149; SM70-NEXT:    and.b64 %rd1, %rd2, -4;
150; SM70-NEXT:    ld.param.u16 %r9, [relaxed_sys_i16_param_1];
151; SM70-NEXT:    cvt.u32.u64 %r10, %rd2;
152; SM70-NEXT:    and.b32 %r11, %r10, 3;
153; SM70-NEXT:    shl.b32 %r1, %r11, 3;
154; SM70-NEXT:    mov.b32 %r12, 65535;
155; SM70-NEXT:    shl.b32 %r13, %r12, %r1;
156; SM70-NEXT:    not.b32 %r2, %r13;
157; SM70-NEXT:    cvt.u32.u16 %r14, %rs1;
158; SM70-NEXT:    shl.b32 %r3, %r14, %r1;
159; SM70-NEXT:    shl.b32 %r4, %r9, %r1;
160; SM70-NEXT:    ld.u32 %r15, [%rd1];
161; SM70-NEXT:    and.b32 %r19, %r15, %r2;
162; SM70-NEXT:  $L__BB1_1: // %partword.cmpxchg.loop
163; SM70-NEXT:    // =>This Inner Loop Header: Depth=1
164; SM70-NEXT:    or.b32 %r16, %r19, %r3;
165; SM70-NEXT:    or.b32 %r17, %r19, %r4;
166; SM70-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
167; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
168; SM70-NEXT:    @%p1 bra $L__BB1_3;
169; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
170; SM70-NEXT:    // in Loop: Header=BB1_1 Depth=1
171; SM70-NEXT:    and.b32 %r8, %r7, %r2;
172; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
173; SM70-NEXT:    mov.u32 %r19, %r8;
174; SM70-NEXT:    @%p2 bra $L__BB1_1;
175; SM70-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
176; SM70-NEXT:    st.param.b32 [func_retval0], %r14;
177; SM70-NEXT:    ret;
178  %pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic
179  ret i16 %new
180}
181
182; CHECK-LABEL: relaxed_sys_i32
183define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
184; SM30-LABEL: relaxed_sys_i32(
185; SM30:       {
186; SM30-NEXT:    .reg .b32 %r<4>;
187; SM30-NEXT:    .reg .b64 %rd<2>;
188; SM30-EMPTY:
189; SM30-NEXT:  // %bb.0:
190; SM30-NEXT:    ld.param.u64 %rd1, [relaxed_sys_i32_param_0];
191; SM30-NEXT:    ld.param.u32 %r1, [relaxed_sys_i32_param_1];
192; SM30-NEXT:    ld.param.u32 %r2, [relaxed_sys_i32_param_2];
193; SM30-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
194; SM30-NEXT:    st.param.b32 [func_retval0], %r2;
195; SM30-NEXT:    ret;
196;
197; SM70-LABEL: relaxed_sys_i32(
198; SM70:       {
199; SM70-NEXT:    .reg .b32 %r<4>;
200; SM70-NEXT:    .reg .b64 %rd<2>;
201; SM70-EMPTY:
202; SM70-NEXT:  // %bb.0:
203; SM70-NEXT:    ld.param.u64 %rd1, [relaxed_sys_i32_param_0];
204; SM70-NEXT:    ld.param.u32 %r1, [relaxed_sys_i32_param_1];
205; SM70-NEXT:    ld.param.u32 %r2, [relaxed_sys_i32_param_2];
206; SM70-NEXT:    atom.cas.b32 %r3, [%rd1], %r1, %r2;
207; SM70-NEXT:    st.param.b32 [func_retval0], %r2;
208; SM70-NEXT:    ret;
209  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic
210  ret i32 %new
211}
212
213; CHECK-LABEL: relaxed_sys_i64
214define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
215; SM30-LABEL: relaxed_sys_i64(
216; SM30:       {
217; SM30-NEXT:    .reg .b64 %rd<5>;
218; SM30-EMPTY:
219; SM30-NEXT:  // %bb.0:
220; SM30-NEXT:    ld.param.u64 %rd1, [relaxed_sys_i64_param_0];
221; SM30-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i64_param_1];
222; SM30-NEXT:    ld.param.u64 %rd3, [relaxed_sys_i64_param_2];
223; SM30-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
224; SM30-NEXT:    st.param.b64 [func_retval0], %rd3;
225; SM30-NEXT:    ret;
226;
227; SM70-LABEL: relaxed_sys_i64(
228; SM70:       {
229; SM70-NEXT:    .reg .b64 %rd<5>;
230; SM70-EMPTY:
231; SM70-NEXT:  // %bb.0:
232; SM70-NEXT:    ld.param.u64 %rd1, [relaxed_sys_i64_param_0];
233; SM70-NEXT:    ld.param.u64 %rd2, [relaxed_sys_i64_param_1];
234; SM70-NEXT:    ld.param.u64 %rd3, [relaxed_sys_i64_param_2];
235; SM70-NEXT:    atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
236; SM70-NEXT:    st.param.b64 [func_retval0], %rd3;
237; SM70-NEXT:    ret;
238  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic
239  ret i64 %new
240}
241;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
242; CHECK: {{.*}}
243