xref: /llvm-project/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (revision 1d0370872f28ec9965448f33db1b105addaf64ae)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
4
5define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) {
6  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic
7  ; GFX90A_GFX940: bb.0 (%ir-block.0):
8  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
9  ; GFX90A_GFX940-NEXT: {{  $}}
10  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
11  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
12  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
14  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
15  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
16  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
17  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
18  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
19  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
20  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
21  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
22  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
23  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
24  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
25  ret void
26}
27
28define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %data) {
29  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic
30  ; GFX90A_GFX940: bb.0 (%ir-block.0):
31  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
32  ; GFX90A_GFX940-NEXT: {{  $}}
33  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
34  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
35  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
36  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
37  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
38  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
39  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
40  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
41  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
42  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
43  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
44  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
45  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
46  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
47  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
48  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
49  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
50  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
51  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
52  ret double %ret
53}
54
55define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 {
56  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
57  ; GFX90A_GFX940: bb.0 (%ir-block.0):
58  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
59  ; GFX90A_GFX940-NEXT: {{  $}}
60  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
61  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
62  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
63  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
64  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
65  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
66  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
67  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
68  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
69  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
70  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
71  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
72  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
73  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
74  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
75  ret void
76}
77
78define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate(ptr %ptr, double %data) #0 {
79  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate
80  ; GFX90A_GFX940: bb.0 (%ir-block.0):
81  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
82  ; GFX90A_GFX940-NEXT: {{  $}}
83  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
84  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
85  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
86  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
87  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
88  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
89  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
90  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
91  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
92  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
93  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
94  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
95  ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
96  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
97  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
98  ret void
99}
100
101define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
102  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
103  ; GFX90A_GFX940: bb.0 (%ir-block.0):
104  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
105  ; GFX90A_GFX940-NEXT: {{  $}}
106  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
107  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
108  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
109  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
110  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
111  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
112  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
113  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
114  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
115  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
116  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
117  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
118  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
119  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
120  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
121  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
122  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
123  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
124  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
125  ret double %ret
126}
127
128define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr, double %data) #0 {
129  ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate
130  ; GFX90A_GFX940: bb.0 (%ir-block.0):
131  ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
132  ; GFX90A_GFX940-NEXT: {{  $}}
133  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
134  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
135  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
136  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
137  ; GFX90A_GFX940-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
138  ; GFX90A_GFX940-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
139  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
140  ; GFX90A_GFX940-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
141  ; GFX90A_GFX940-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
142  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
143  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
144  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
145  ; GFX90A_GFX940-NEXT:   [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
146  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
147  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
148  ; GFX90A_GFX940-NEXT:   $sgpr0 = COPY [[COPY6]]
149  ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[COPY7]]
150  ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
151  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
152  ret double %ret
153}
154
155declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
156
157attributes #0 = { nounwind }
158
159!0 = !{}
160!1 = !{i32 5, i32 6}
161