xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
8; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GFX12 %s
9
10define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
11; GFX6-LABEL: atomic_swap_i32_1d:
12; GFX6:       ; %bb.0: ; %main_body
13; GFX6-NEXT:    s_mov_b32 s0, s2
14; GFX6-NEXT:    s_mov_b32 s1, s3
15; GFX6-NEXT:    s_mov_b32 s2, s4
16; GFX6-NEXT:    s_mov_b32 s3, s5
17; GFX6-NEXT:    s_mov_b32 s4, s6
18; GFX6-NEXT:    s_mov_b32 s5, s7
19; GFX6-NEXT:    s_mov_b32 s6, s8
20; GFX6-NEXT:    s_mov_b32 s7, s9
21; GFX6-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
22; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
23; GFX6-NEXT:    ; return to shader part epilog
24;
25; GFX8-LABEL: atomic_swap_i32_1d:
26; GFX8:       ; %bb.0: ; %main_body
27; GFX8-NEXT:    s_mov_b32 s0, s2
28; GFX8-NEXT:    s_mov_b32 s1, s3
29; GFX8-NEXT:    s_mov_b32 s2, s4
30; GFX8-NEXT:    s_mov_b32 s3, s5
31; GFX8-NEXT:    s_mov_b32 s4, s6
32; GFX8-NEXT:    s_mov_b32 s5, s7
33; GFX8-NEXT:    s_mov_b32 s6, s8
34; GFX8-NEXT:    s_mov_b32 s7, s9
35; GFX8-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
36; GFX8-NEXT:    s_waitcnt vmcnt(0)
37; GFX8-NEXT:    ; return to shader part epilog
38;
39; GFX900-LABEL: atomic_swap_i32_1d:
40; GFX900:       ; %bb.0: ; %main_body
41; GFX900-NEXT:    s_mov_b32 s0, s2
42; GFX900-NEXT:    s_mov_b32 s1, s3
43; GFX900-NEXT:    s_mov_b32 s2, s4
44; GFX900-NEXT:    s_mov_b32 s3, s5
45; GFX900-NEXT:    s_mov_b32 s4, s6
46; GFX900-NEXT:    s_mov_b32 s5, s7
47; GFX900-NEXT:    s_mov_b32 s6, s8
48; GFX900-NEXT:    s_mov_b32 s7, s9
49; GFX900-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
50; GFX900-NEXT:    s_waitcnt vmcnt(0)
51; GFX900-NEXT:    ; return to shader part epilog
52;
53; GFX90A-LABEL: atomic_swap_i32_1d:
54; GFX90A:       ; %bb.0: ; %main_body
55; GFX90A-NEXT:    s_mov_b32 s0, s2
56; GFX90A-NEXT:    s_mov_b32 s1, s3
57; GFX90A-NEXT:    s_mov_b32 s2, s4
58; GFX90A-NEXT:    s_mov_b32 s3, s5
59; GFX90A-NEXT:    s_mov_b32 s4, s6
60; GFX90A-NEXT:    s_mov_b32 s5, s7
61; GFX90A-NEXT:    s_mov_b32 s6, s8
62; GFX90A-NEXT:    s_mov_b32 s7, s9
63; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
64; GFX90A-NEXT:    image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc
65; GFX90A-NEXT:    s_waitcnt vmcnt(0)
66; GFX90A-NEXT:    ; return to shader part epilog
67;
68; GFX10PLUS-LABEL: atomic_swap_i32_1d:
69; GFX10PLUS:       ; %bb.0: ; %main_body
70; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
71; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
72; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
73; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
74; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
75; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
76; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
77; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
78; GFX10PLUS-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
79; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
80; GFX10PLUS-NEXT:    ; return to shader part epilog
81;
82; GFX12-LABEL: atomic_swap_i32_1d:
83; GFX12:       ; %bb.0: ; %main_body
84; GFX12-NEXT:    s_mov_b32 s0, s2
85; GFX12-NEXT:    s_mov_b32 s1, s3
86; GFX12-NEXT:    s_mov_b32 s2, s4
87; GFX12-NEXT:    s_mov_b32 s3, s5
88; GFX12-NEXT:    s_mov_b32 s4, s6
89; GFX12-NEXT:    s_mov_b32 s5, s7
90; GFX12-NEXT:    s_mov_b32 s6, s8
91; GFX12-NEXT:    s_mov_b32 s7, s9
92; GFX12-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
93; GFX12-NEXT:    s_wait_loadcnt 0x0
94; GFX12-NEXT:    ; return to shader part epilog
95main_body:
96  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
97  %out = bitcast i32 %v to float
98  ret float %out
99}
100
101define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
102; GFX6-LABEL: atomic_add_i32_1d:
103; GFX6:       ; %bb.0: ; %main_body
104; GFX6-NEXT:    s_mov_b32 s0, s2
105; GFX6-NEXT:    s_mov_b32 s1, s3
106; GFX6-NEXT:    s_mov_b32 s2, s4
107; GFX6-NEXT:    s_mov_b32 s3, s5
108; GFX6-NEXT:    s_mov_b32 s4, s6
109; GFX6-NEXT:    s_mov_b32 s5, s7
110; GFX6-NEXT:    s_mov_b32 s6, s8
111; GFX6-NEXT:    s_mov_b32 s7, s9
112; GFX6-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
113; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
114; GFX6-NEXT:    ; return to shader part epilog
115;
116; GFX8-LABEL: atomic_add_i32_1d:
117; GFX8:       ; %bb.0: ; %main_body
118; GFX8-NEXT:    s_mov_b32 s0, s2
119; GFX8-NEXT:    s_mov_b32 s1, s3
120; GFX8-NEXT:    s_mov_b32 s2, s4
121; GFX8-NEXT:    s_mov_b32 s3, s5
122; GFX8-NEXT:    s_mov_b32 s4, s6
123; GFX8-NEXT:    s_mov_b32 s5, s7
124; GFX8-NEXT:    s_mov_b32 s6, s8
125; GFX8-NEXT:    s_mov_b32 s7, s9
126; GFX8-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
127; GFX8-NEXT:    s_waitcnt vmcnt(0)
128; GFX8-NEXT:    ; return to shader part epilog
129;
130; GFX900-LABEL: atomic_add_i32_1d:
131; GFX900:       ; %bb.0: ; %main_body
132; GFX900-NEXT:    s_mov_b32 s0, s2
133; GFX900-NEXT:    s_mov_b32 s1, s3
134; GFX900-NEXT:    s_mov_b32 s2, s4
135; GFX900-NEXT:    s_mov_b32 s3, s5
136; GFX900-NEXT:    s_mov_b32 s4, s6
137; GFX900-NEXT:    s_mov_b32 s5, s7
138; GFX900-NEXT:    s_mov_b32 s6, s8
139; GFX900-NEXT:    s_mov_b32 s7, s9
140; GFX900-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
141; GFX900-NEXT:    s_waitcnt vmcnt(0)
142; GFX900-NEXT:    ; return to shader part epilog
143;
144; GFX90A-LABEL: atomic_add_i32_1d:
145; GFX90A:       ; %bb.0: ; %main_body
146; GFX90A-NEXT:    s_mov_b32 s0, s2
147; GFX90A-NEXT:    s_mov_b32 s1, s3
148; GFX90A-NEXT:    s_mov_b32 s2, s4
149; GFX90A-NEXT:    s_mov_b32 s3, s5
150; GFX90A-NEXT:    s_mov_b32 s4, s6
151; GFX90A-NEXT:    s_mov_b32 s5, s7
152; GFX90A-NEXT:    s_mov_b32 s6, s8
153; GFX90A-NEXT:    s_mov_b32 s7, s9
154; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
155; GFX90A-NEXT:    image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc
156; GFX90A-NEXT:    s_waitcnt vmcnt(0)
157; GFX90A-NEXT:    ; return to shader part epilog
158;
159; GFX10PLUS-LABEL: atomic_add_i32_1d:
160; GFX10PLUS:       ; %bb.0: ; %main_body
161; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
162; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
163; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
164; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
165; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
166; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
167; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
168; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
169; GFX10PLUS-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
170; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
171; GFX10PLUS-NEXT:    ; return to shader part epilog
172;
173; GFX12-LABEL: atomic_add_i32_1d:
174; GFX12:       ; %bb.0: ; %main_body
175; GFX12-NEXT:    s_mov_b32 s0, s2
176; GFX12-NEXT:    s_mov_b32 s1, s3
177; GFX12-NEXT:    s_mov_b32 s2, s4
178; GFX12-NEXT:    s_mov_b32 s3, s5
179; GFX12-NEXT:    s_mov_b32 s4, s6
180; GFX12-NEXT:    s_mov_b32 s5, s7
181; GFX12-NEXT:    s_mov_b32 s6, s8
182; GFX12-NEXT:    s_mov_b32 s7, s9
183; GFX12-NEXT:    image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
184; GFX12-NEXT:    s_wait_loadcnt 0x0
185; GFX12-NEXT:    ; return to shader part epilog
186main_body:
187  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
188  %out = bitcast i32 %v to float
189  ret float %out
190}
191
192define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
193; GFX6-LABEL: atomic_sub_i32_1d:
194; GFX6:       ; %bb.0: ; %main_body
195; GFX6-NEXT:    s_mov_b32 s0, s2
196; GFX6-NEXT:    s_mov_b32 s1, s3
197; GFX6-NEXT:    s_mov_b32 s2, s4
198; GFX6-NEXT:    s_mov_b32 s3, s5
199; GFX6-NEXT:    s_mov_b32 s4, s6
200; GFX6-NEXT:    s_mov_b32 s5, s7
201; GFX6-NEXT:    s_mov_b32 s6, s8
202; GFX6-NEXT:    s_mov_b32 s7, s9
203; GFX6-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
204; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
205; GFX6-NEXT:    ; return to shader part epilog
206;
207; GFX8-LABEL: atomic_sub_i32_1d:
208; GFX8:       ; %bb.0: ; %main_body
209; GFX8-NEXT:    s_mov_b32 s0, s2
210; GFX8-NEXT:    s_mov_b32 s1, s3
211; GFX8-NEXT:    s_mov_b32 s2, s4
212; GFX8-NEXT:    s_mov_b32 s3, s5
213; GFX8-NEXT:    s_mov_b32 s4, s6
214; GFX8-NEXT:    s_mov_b32 s5, s7
215; GFX8-NEXT:    s_mov_b32 s6, s8
216; GFX8-NEXT:    s_mov_b32 s7, s9
217; GFX8-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
218; GFX8-NEXT:    s_waitcnt vmcnt(0)
219; GFX8-NEXT:    ; return to shader part epilog
220;
221; GFX900-LABEL: atomic_sub_i32_1d:
222; GFX900:       ; %bb.0: ; %main_body
223; GFX900-NEXT:    s_mov_b32 s0, s2
224; GFX900-NEXT:    s_mov_b32 s1, s3
225; GFX900-NEXT:    s_mov_b32 s2, s4
226; GFX900-NEXT:    s_mov_b32 s3, s5
227; GFX900-NEXT:    s_mov_b32 s4, s6
228; GFX900-NEXT:    s_mov_b32 s5, s7
229; GFX900-NEXT:    s_mov_b32 s6, s8
230; GFX900-NEXT:    s_mov_b32 s7, s9
231; GFX900-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
232; GFX900-NEXT:    s_waitcnt vmcnt(0)
233; GFX900-NEXT:    ; return to shader part epilog
234;
235; GFX90A-LABEL: atomic_sub_i32_1d:
236; GFX90A:       ; %bb.0: ; %main_body
237; GFX90A-NEXT:    s_mov_b32 s0, s2
238; GFX90A-NEXT:    s_mov_b32 s1, s3
239; GFX90A-NEXT:    s_mov_b32 s2, s4
240; GFX90A-NEXT:    s_mov_b32 s3, s5
241; GFX90A-NEXT:    s_mov_b32 s4, s6
242; GFX90A-NEXT:    s_mov_b32 s5, s7
243; GFX90A-NEXT:    s_mov_b32 s6, s8
244; GFX90A-NEXT:    s_mov_b32 s7, s9
245; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
246; GFX90A-NEXT:    image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc
247; GFX90A-NEXT:    s_waitcnt vmcnt(0)
248; GFX90A-NEXT:    ; return to shader part epilog
249;
250; GFX10PLUS-LABEL: atomic_sub_i32_1d:
251; GFX10PLUS:       ; %bb.0: ; %main_body
252; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
253; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
254; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
255; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
256; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
257; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
258; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
259; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
260; GFX10PLUS-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
261; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
262; GFX10PLUS-NEXT:    ; return to shader part epilog
263;
264; GFX12-LABEL: atomic_sub_i32_1d:
265; GFX12:       ; %bb.0: ; %main_body
266; GFX12-NEXT:    s_mov_b32 s0, s2
267; GFX12-NEXT:    s_mov_b32 s1, s3
268; GFX12-NEXT:    s_mov_b32 s2, s4
269; GFX12-NEXT:    s_mov_b32 s3, s5
270; GFX12-NEXT:    s_mov_b32 s4, s6
271; GFX12-NEXT:    s_mov_b32 s5, s7
272; GFX12-NEXT:    s_mov_b32 s6, s8
273; GFX12-NEXT:    s_mov_b32 s7, s9
274; GFX12-NEXT:    image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
275; GFX12-NEXT:    s_wait_loadcnt 0x0
276; GFX12-NEXT:    ; return to shader part epilog
277main_body:
278  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
279  %out = bitcast i32 %v to float
280  ret float %out
281}
282
283define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
284; GFX6-LABEL: atomic_smin_i32_1d:
285; GFX6:       ; %bb.0: ; %main_body
286; GFX6-NEXT:    s_mov_b32 s0, s2
287; GFX6-NEXT:    s_mov_b32 s1, s3
288; GFX6-NEXT:    s_mov_b32 s2, s4
289; GFX6-NEXT:    s_mov_b32 s3, s5
290; GFX6-NEXT:    s_mov_b32 s4, s6
291; GFX6-NEXT:    s_mov_b32 s5, s7
292; GFX6-NEXT:    s_mov_b32 s6, s8
293; GFX6-NEXT:    s_mov_b32 s7, s9
294; GFX6-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
295; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
296; GFX6-NEXT:    ; return to shader part epilog
297;
298; GFX8-LABEL: atomic_smin_i32_1d:
299; GFX8:       ; %bb.0: ; %main_body
300; GFX8-NEXT:    s_mov_b32 s0, s2
301; GFX8-NEXT:    s_mov_b32 s1, s3
302; GFX8-NEXT:    s_mov_b32 s2, s4
303; GFX8-NEXT:    s_mov_b32 s3, s5
304; GFX8-NEXT:    s_mov_b32 s4, s6
305; GFX8-NEXT:    s_mov_b32 s5, s7
306; GFX8-NEXT:    s_mov_b32 s6, s8
307; GFX8-NEXT:    s_mov_b32 s7, s9
308; GFX8-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
309; GFX8-NEXT:    s_waitcnt vmcnt(0)
310; GFX8-NEXT:    ; return to shader part epilog
311;
312; GFX900-LABEL: atomic_smin_i32_1d:
313; GFX900:       ; %bb.0: ; %main_body
314; GFX900-NEXT:    s_mov_b32 s0, s2
315; GFX900-NEXT:    s_mov_b32 s1, s3
316; GFX900-NEXT:    s_mov_b32 s2, s4
317; GFX900-NEXT:    s_mov_b32 s3, s5
318; GFX900-NEXT:    s_mov_b32 s4, s6
319; GFX900-NEXT:    s_mov_b32 s5, s7
320; GFX900-NEXT:    s_mov_b32 s6, s8
321; GFX900-NEXT:    s_mov_b32 s7, s9
322; GFX900-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
323; GFX900-NEXT:    s_waitcnt vmcnt(0)
324; GFX900-NEXT:    ; return to shader part epilog
325;
326; GFX90A-LABEL: atomic_smin_i32_1d:
327; GFX90A:       ; %bb.0: ; %main_body
328; GFX90A-NEXT:    s_mov_b32 s0, s2
329; GFX90A-NEXT:    s_mov_b32 s1, s3
330; GFX90A-NEXT:    s_mov_b32 s2, s4
331; GFX90A-NEXT:    s_mov_b32 s3, s5
332; GFX90A-NEXT:    s_mov_b32 s4, s6
333; GFX90A-NEXT:    s_mov_b32 s5, s7
334; GFX90A-NEXT:    s_mov_b32 s6, s8
335; GFX90A-NEXT:    s_mov_b32 s7, s9
336; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
337; GFX90A-NEXT:    image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc
338; GFX90A-NEXT:    s_waitcnt vmcnt(0)
339; GFX90A-NEXT:    ; return to shader part epilog
340;
341; GFX10PLUS-LABEL: atomic_smin_i32_1d:
342; GFX10PLUS:       ; %bb.0: ; %main_body
343; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
344; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
345; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
346; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
347; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
348; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
349; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
350; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
351; GFX10PLUS-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
352; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
353; GFX10PLUS-NEXT:    ; return to shader part epilog
354;
355; GFX12-LABEL: atomic_smin_i32_1d:
356; GFX12:       ; %bb.0: ; %main_body
357; GFX12-NEXT:    s_mov_b32 s0, s2
358; GFX12-NEXT:    s_mov_b32 s1, s3
359; GFX12-NEXT:    s_mov_b32 s2, s4
360; GFX12-NEXT:    s_mov_b32 s3, s5
361; GFX12-NEXT:    s_mov_b32 s4, s6
362; GFX12-NEXT:    s_mov_b32 s5, s7
363; GFX12-NEXT:    s_mov_b32 s6, s8
364; GFX12-NEXT:    s_mov_b32 s7, s9
365; GFX12-NEXT:    image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
366; GFX12-NEXT:    s_wait_loadcnt 0x0
367; GFX12-NEXT:    ; return to shader part epilog
368main_body:
369  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
370  %out = bitcast i32 %v to float
371  ret float %out
372}
373
374define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
375; GFX6-LABEL: atomic_umin_i32_1d:
376; GFX6:       ; %bb.0: ; %main_body
377; GFX6-NEXT:    s_mov_b32 s0, s2
378; GFX6-NEXT:    s_mov_b32 s1, s3
379; GFX6-NEXT:    s_mov_b32 s2, s4
380; GFX6-NEXT:    s_mov_b32 s3, s5
381; GFX6-NEXT:    s_mov_b32 s4, s6
382; GFX6-NEXT:    s_mov_b32 s5, s7
383; GFX6-NEXT:    s_mov_b32 s6, s8
384; GFX6-NEXT:    s_mov_b32 s7, s9
385; GFX6-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
386; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
387; GFX6-NEXT:    ; return to shader part epilog
388;
389; GFX8-LABEL: atomic_umin_i32_1d:
390; GFX8:       ; %bb.0: ; %main_body
391; GFX8-NEXT:    s_mov_b32 s0, s2
392; GFX8-NEXT:    s_mov_b32 s1, s3
393; GFX8-NEXT:    s_mov_b32 s2, s4
394; GFX8-NEXT:    s_mov_b32 s3, s5
395; GFX8-NEXT:    s_mov_b32 s4, s6
396; GFX8-NEXT:    s_mov_b32 s5, s7
397; GFX8-NEXT:    s_mov_b32 s6, s8
398; GFX8-NEXT:    s_mov_b32 s7, s9
399; GFX8-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
400; GFX8-NEXT:    s_waitcnt vmcnt(0)
401; GFX8-NEXT:    ; return to shader part epilog
402;
403; GFX900-LABEL: atomic_umin_i32_1d:
404; GFX900:       ; %bb.0: ; %main_body
405; GFX900-NEXT:    s_mov_b32 s0, s2
406; GFX900-NEXT:    s_mov_b32 s1, s3
407; GFX900-NEXT:    s_mov_b32 s2, s4
408; GFX900-NEXT:    s_mov_b32 s3, s5
409; GFX900-NEXT:    s_mov_b32 s4, s6
410; GFX900-NEXT:    s_mov_b32 s5, s7
411; GFX900-NEXT:    s_mov_b32 s6, s8
412; GFX900-NEXT:    s_mov_b32 s7, s9
413; GFX900-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
414; GFX900-NEXT:    s_waitcnt vmcnt(0)
415; GFX900-NEXT:    ; return to shader part epilog
416;
417; GFX90A-LABEL: atomic_umin_i32_1d:
418; GFX90A:       ; %bb.0: ; %main_body
419; GFX90A-NEXT:    s_mov_b32 s0, s2
420; GFX90A-NEXT:    s_mov_b32 s1, s3
421; GFX90A-NEXT:    s_mov_b32 s2, s4
422; GFX90A-NEXT:    s_mov_b32 s3, s5
423; GFX90A-NEXT:    s_mov_b32 s4, s6
424; GFX90A-NEXT:    s_mov_b32 s5, s7
425; GFX90A-NEXT:    s_mov_b32 s6, s8
426; GFX90A-NEXT:    s_mov_b32 s7, s9
427; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
428; GFX90A-NEXT:    image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc
429; GFX90A-NEXT:    s_waitcnt vmcnt(0)
430; GFX90A-NEXT:    ; return to shader part epilog
431;
432; GFX10PLUS-LABEL: atomic_umin_i32_1d:
433; GFX10PLUS:       ; %bb.0: ; %main_body
434; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
435; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
436; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
437; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
438; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
439; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
440; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
441; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
442; GFX10PLUS-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
443; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
444; GFX10PLUS-NEXT:    ; return to shader part epilog
445;
446; GFX12-LABEL: atomic_umin_i32_1d:
447; GFX12:       ; %bb.0: ; %main_body
448; GFX12-NEXT:    s_mov_b32 s0, s2
449; GFX12-NEXT:    s_mov_b32 s1, s3
450; GFX12-NEXT:    s_mov_b32 s2, s4
451; GFX12-NEXT:    s_mov_b32 s3, s5
452; GFX12-NEXT:    s_mov_b32 s4, s6
453; GFX12-NEXT:    s_mov_b32 s5, s7
454; GFX12-NEXT:    s_mov_b32 s6, s8
455; GFX12-NEXT:    s_mov_b32 s7, s9
456; GFX12-NEXT:    image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
457; GFX12-NEXT:    s_wait_loadcnt 0x0
458; GFX12-NEXT:    ; return to shader part epilog
459main_body:
460  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
461  %out = bitcast i32 %v to float
462  ret float %out
463}
464
465define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
466; GFX6-LABEL: atomic_smax_i32_1d:
467; GFX6:       ; %bb.0: ; %main_body
468; GFX6-NEXT:    s_mov_b32 s0, s2
469; GFX6-NEXT:    s_mov_b32 s1, s3
470; GFX6-NEXT:    s_mov_b32 s2, s4
471; GFX6-NEXT:    s_mov_b32 s3, s5
472; GFX6-NEXT:    s_mov_b32 s4, s6
473; GFX6-NEXT:    s_mov_b32 s5, s7
474; GFX6-NEXT:    s_mov_b32 s6, s8
475; GFX6-NEXT:    s_mov_b32 s7, s9
476; GFX6-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
477; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
478; GFX6-NEXT:    ; return to shader part epilog
479;
480; GFX8-LABEL: atomic_smax_i32_1d:
481; GFX8:       ; %bb.0: ; %main_body
482; GFX8-NEXT:    s_mov_b32 s0, s2
483; GFX8-NEXT:    s_mov_b32 s1, s3
484; GFX8-NEXT:    s_mov_b32 s2, s4
485; GFX8-NEXT:    s_mov_b32 s3, s5
486; GFX8-NEXT:    s_mov_b32 s4, s6
487; GFX8-NEXT:    s_mov_b32 s5, s7
488; GFX8-NEXT:    s_mov_b32 s6, s8
489; GFX8-NEXT:    s_mov_b32 s7, s9
490; GFX8-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
491; GFX8-NEXT:    s_waitcnt vmcnt(0)
492; GFX8-NEXT:    ; return to shader part epilog
493;
494; GFX900-LABEL: atomic_smax_i32_1d:
495; GFX900:       ; %bb.0: ; %main_body
496; GFX900-NEXT:    s_mov_b32 s0, s2
497; GFX900-NEXT:    s_mov_b32 s1, s3
498; GFX900-NEXT:    s_mov_b32 s2, s4
499; GFX900-NEXT:    s_mov_b32 s3, s5
500; GFX900-NEXT:    s_mov_b32 s4, s6
501; GFX900-NEXT:    s_mov_b32 s5, s7
502; GFX900-NEXT:    s_mov_b32 s6, s8
503; GFX900-NEXT:    s_mov_b32 s7, s9
504; GFX900-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
505; GFX900-NEXT:    s_waitcnt vmcnt(0)
506; GFX900-NEXT:    ; return to shader part epilog
507;
508; GFX90A-LABEL: atomic_smax_i32_1d:
509; GFX90A:       ; %bb.0: ; %main_body
510; GFX90A-NEXT:    s_mov_b32 s0, s2
511; GFX90A-NEXT:    s_mov_b32 s1, s3
512; GFX90A-NEXT:    s_mov_b32 s2, s4
513; GFX90A-NEXT:    s_mov_b32 s3, s5
514; GFX90A-NEXT:    s_mov_b32 s4, s6
515; GFX90A-NEXT:    s_mov_b32 s5, s7
516; GFX90A-NEXT:    s_mov_b32 s6, s8
517; GFX90A-NEXT:    s_mov_b32 s7, s9
518; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
519; GFX90A-NEXT:    image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc
520; GFX90A-NEXT:    s_waitcnt vmcnt(0)
521; GFX90A-NEXT:    ; return to shader part epilog
522;
523; GFX10PLUS-LABEL: atomic_smax_i32_1d:
524; GFX10PLUS:       ; %bb.0: ; %main_body
525; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
526; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
527; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
528; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
529; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
530; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
531; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
532; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
533; GFX10PLUS-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
534; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
535; GFX10PLUS-NEXT:    ; return to shader part epilog
536;
537; GFX12-LABEL: atomic_smax_i32_1d:
538; GFX12:       ; %bb.0: ; %main_body
539; GFX12-NEXT:    s_mov_b32 s0, s2
540; GFX12-NEXT:    s_mov_b32 s1, s3
541; GFX12-NEXT:    s_mov_b32 s2, s4
542; GFX12-NEXT:    s_mov_b32 s3, s5
543; GFX12-NEXT:    s_mov_b32 s4, s6
544; GFX12-NEXT:    s_mov_b32 s5, s7
545; GFX12-NEXT:    s_mov_b32 s6, s8
546; GFX12-NEXT:    s_mov_b32 s7, s9
547; GFX12-NEXT:    image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
548; GFX12-NEXT:    s_wait_loadcnt 0x0
549; GFX12-NEXT:    ; return to shader part epilog
550main_body:
551  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
552  %out = bitcast i32 %v to float
553  ret float %out
554}
555
556define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
557; GFX6-LABEL: atomic_umax_i32_1d:
558; GFX6:       ; %bb.0: ; %main_body
559; GFX6-NEXT:    s_mov_b32 s0, s2
560; GFX6-NEXT:    s_mov_b32 s1, s3
561; GFX6-NEXT:    s_mov_b32 s2, s4
562; GFX6-NEXT:    s_mov_b32 s3, s5
563; GFX6-NEXT:    s_mov_b32 s4, s6
564; GFX6-NEXT:    s_mov_b32 s5, s7
565; GFX6-NEXT:    s_mov_b32 s6, s8
566; GFX6-NEXT:    s_mov_b32 s7, s9
567; GFX6-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
568; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
569; GFX6-NEXT:    ; return to shader part epilog
570;
571; GFX8-LABEL: atomic_umax_i32_1d:
572; GFX8:       ; %bb.0: ; %main_body
573; GFX8-NEXT:    s_mov_b32 s0, s2
574; GFX8-NEXT:    s_mov_b32 s1, s3
575; GFX8-NEXT:    s_mov_b32 s2, s4
576; GFX8-NEXT:    s_mov_b32 s3, s5
577; GFX8-NEXT:    s_mov_b32 s4, s6
578; GFX8-NEXT:    s_mov_b32 s5, s7
579; GFX8-NEXT:    s_mov_b32 s6, s8
580; GFX8-NEXT:    s_mov_b32 s7, s9
581; GFX8-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
582; GFX8-NEXT:    s_waitcnt vmcnt(0)
583; GFX8-NEXT:    ; return to shader part epilog
584;
585; GFX900-LABEL: atomic_umax_i32_1d:
586; GFX900:       ; %bb.0: ; %main_body
587; GFX900-NEXT:    s_mov_b32 s0, s2
588; GFX900-NEXT:    s_mov_b32 s1, s3
589; GFX900-NEXT:    s_mov_b32 s2, s4
590; GFX900-NEXT:    s_mov_b32 s3, s5
591; GFX900-NEXT:    s_mov_b32 s4, s6
592; GFX900-NEXT:    s_mov_b32 s5, s7
593; GFX900-NEXT:    s_mov_b32 s6, s8
594; GFX900-NEXT:    s_mov_b32 s7, s9
595; GFX900-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
596; GFX900-NEXT:    s_waitcnt vmcnt(0)
597; GFX900-NEXT:    ; return to shader part epilog
598;
599; GFX90A-LABEL: atomic_umax_i32_1d:
600; GFX90A:       ; %bb.0: ; %main_body
601; GFX90A-NEXT:    s_mov_b32 s0, s2
602; GFX90A-NEXT:    s_mov_b32 s1, s3
603; GFX90A-NEXT:    s_mov_b32 s2, s4
604; GFX90A-NEXT:    s_mov_b32 s3, s5
605; GFX90A-NEXT:    s_mov_b32 s4, s6
606; GFX90A-NEXT:    s_mov_b32 s5, s7
607; GFX90A-NEXT:    s_mov_b32 s6, s8
608; GFX90A-NEXT:    s_mov_b32 s7, s9
609; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
610; GFX90A-NEXT:    image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc
611; GFX90A-NEXT:    s_waitcnt vmcnt(0)
612; GFX90A-NEXT:    ; return to shader part epilog
613;
614; GFX10PLUS-LABEL: atomic_umax_i32_1d:
615; GFX10PLUS:       ; %bb.0: ; %main_body
616; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
617; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
618; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
619; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
620; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
621; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
622; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
623; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
624; GFX10PLUS-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
625; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
626; GFX10PLUS-NEXT:    ; return to shader part epilog
627;
628; GFX12-LABEL: atomic_umax_i32_1d:
629; GFX12:       ; %bb.0: ; %main_body
630; GFX12-NEXT:    s_mov_b32 s0, s2
631; GFX12-NEXT:    s_mov_b32 s1, s3
632; GFX12-NEXT:    s_mov_b32 s2, s4
633; GFX12-NEXT:    s_mov_b32 s3, s5
634; GFX12-NEXT:    s_mov_b32 s4, s6
635; GFX12-NEXT:    s_mov_b32 s5, s7
636; GFX12-NEXT:    s_mov_b32 s6, s8
637; GFX12-NEXT:    s_mov_b32 s7, s9
638; GFX12-NEXT:    image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
639; GFX12-NEXT:    s_wait_loadcnt 0x0
640; GFX12-NEXT:    ; return to shader part epilog
641main_body:
642  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
643  %out = bitcast i32 %v to float
644  ret float %out
645}
646
647define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
648; GFX6-LABEL: atomic_and_i32_1d:
649; GFX6:       ; %bb.0: ; %main_body
650; GFX6-NEXT:    s_mov_b32 s0, s2
651; GFX6-NEXT:    s_mov_b32 s1, s3
652; GFX6-NEXT:    s_mov_b32 s2, s4
653; GFX6-NEXT:    s_mov_b32 s3, s5
654; GFX6-NEXT:    s_mov_b32 s4, s6
655; GFX6-NEXT:    s_mov_b32 s5, s7
656; GFX6-NEXT:    s_mov_b32 s6, s8
657; GFX6-NEXT:    s_mov_b32 s7, s9
658; GFX6-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
659; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
660; GFX6-NEXT:    ; return to shader part epilog
661;
662; GFX8-LABEL: atomic_and_i32_1d:
663; GFX8:       ; %bb.0: ; %main_body
664; GFX8-NEXT:    s_mov_b32 s0, s2
665; GFX8-NEXT:    s_mov_b32 s1, s3
666; GFX8-NEXT:    s_mov_b32 s2, s4
667; GFX8-NEXT:    s_mov_b32 s3, s5
668; GFX8-NEXT:    s_mov_b32 s4, s6
669; GFX8-NEXT:    s_mov_b32 s5, s7
670; GFX8-NEXT:    s_mov_b32 s6, s8
671; GFX8-NEXT:    s_mov_b32 s7, s9
672; GFX8-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
673; GFX8-NEXT:    s_waitcnt vmcnt(0)
674; GFX8-NEXT:    ; return to shader part epilog
675;
676; GFX900-LABEL: atomic_and_i32_1d:
677; GFX900:       ; %bb.0: ; %main_body
678; GFX900-NEXT:    s_mov_b32 s0, s2
679; GFX900-NEXT:    s_mov_b32 s1, s3
680; GFX900-NEXT:    s_mov_b32 s2, s4
681; GFX900-NEXT:    s_mov_b32 s3, s5
682; GFX900-NEXT:    s_mov_b32 s4, s6
683; GFX900-NEXT:    s_mov_b32 s5, s7
684; GFX900-NEXT:    s_mov_b32 s6, s8
685; GFX900-NEXT:    s_mov_b32 s7, s9
686; GFX900-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
687; GFX900-NEXT:    s_waitcnt vmcnt(0)
688; GFX900-NEXT:    ; return to shader part epilog
689;
690; GFX90A-LABEL: atomic_and_i32_1d:
691; GFX90A:       ; %bb.0: ; %main_body
692; GFX90A-NEXT:    s_mov_b32 s0, s2
693; GFX90A-NEXT:    s_mov_b32 s1, s3
694; GFX90A-NEXT:    s_mov_b32 s2, s4
695; GFX90A-NEXT:    s_mov_b32 s3, s5
696; GFX90A-NEXT:    s_mov_b32 s4, s6
697; GFX90A-NEXT:    s_mov_b32 s5, s7
698; GFX90A-NEXT:    s_mov_b32 s6, s8
699; GFX90A-NEXT:    s_mov_b32 s7, s9
700; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
701; GFX90A-NEXT:    image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc
702; GFX90A-NEXT:    s_waitcnt vmcnt(0)
703; GFX90A-NEXT:    ; return to shader part epilog
704;
705; GFX10PLUS-LABEL: atomic_and_i32_1d:
706; GFX10PLUS:       ; %bb.0: ; %main_body
707; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
708; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
709; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
710; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
711; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
712; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
713; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
714; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
715; GFX10PLUS-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
716; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
717; GFX10PLUS-NEXT:    ; return to shader part epilog
718;
719; GFX12-LABEL: atomic_and_i32_1d:
720; GFX12:       ; %bb.0: ; %main_body
721; GFX12-NEXT:    s_mov_b32 s0, s2
722; GFX12-NEXT:    s_mov_b32 s1, s3
723; GFX12-NEXT:    s_mov_b32 s2, s4
724; GFX12-NEXT:    s_mov_b32 s3, s5
725; GFX12-NEXT:    s_mov_b32 s4, s6
726; GFX12-NEXT:    s_mov_b32 s5, s7
727; GFX12-NEXT:    s_mov_b32 s6, s8
728; GFX12-NEXT:    s_mov_b32 s7, s9
729; GFX12-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
730; GFX12-NEXT:    s_wait_loadcnt 0x0
731; GFX12-NEXT:    ; return to shader part epilog
732main_body:
733  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
734  %out = bitcast i32 %v to float
735  ret float %out
736}
737
738define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
739; GFX6-LABEL: atomic_or_i32_1d:
740; GFX6:       ; %bb.0: ; %main_body
741; GFX6-NEXT:    s_mov_b32 s0, s2
742; GFX6-NEXT:    s_mov_b32 s1, s3
743; GFX6-NEXT:    s_mov_b32 s2, s4
744; GFX6-NEXT:    s_mov_b32 s3, s5
745; GFX6-NEXT:    s_mov_b32 s4, s6
746; GFX6-NEXT:    s_mov_b32 s5, s7
747; GFX6-NEXT:    s_mov_b32 s6, s8
748; GFX6-NEXT:    s_mov_b32 s7, s9
749; GFX6-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
750; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
751; GFX6-NEXT:    ; return to shader part epilog
752;
753; GFX8-LABEL: atomic_or_i32_1d:
754; GFX8:       ; %bb.0: ; %main_body
755; GFX8-NEXT:    s_mov_b32 s0, s2
756; GFX8-NEXT:    s_mov_b32 s1, s3
757; GFX8-NEXT:    s_mov_b32 s2, s4
758; GFX8-NEXT:    s_mov_b32 s3, s5
759; GFX8-NEXT:    s_mov_b32 s4, s6
760; GFX8-NEXT:    s_mov_b32 s5, s7
761; GFX8-NEXT:    s_mov_b32 s6, s8
762; GFX8-NEXT:    s_mov_b32 s7, s9
763; GFX8-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
764; GFX8-NEXT:    s_waitcnt vmcnt(0)
765; GFX8-NEXT:    ; return to shader part epilog
766;
767; GFX900-LABEL: atomic_or_i32_1d:
768; GFX900:       ; %bb.0: ; %main_body
769; GFX900-NEXT:    s_mov_b32 s0, s2
770; GFX900-NEXT:    s_mov_b32 s1, s3
771; GFX900-NEXT:    s_mov_b32 s2, s4
772; GFX900-NEXT:    s_mov_b32 s3, s5
773; GFX900-NEXT:    s_mov_b32 s4, s6
774; GFX900-NEXT:    s_mov_b32 s5, s7
775; GFX900-NEXT:    s_mov_b32 s6, s8
776; GFX900-NEXT:    s_mov_b32 s7, s9
777; GFX900-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
778; GFX900-NEXT:    s_waitcnt vmcnt(0)
779; GFX900-NEXT:    ; return to shader part epilog
780;
781; GFX90A-LABEL: atomic_or_i32_1d:
782; GFX90A:       ; %bb.0: ; %main_body
783; GFX90A-NEXT:    s_mov_b32 s0, s2
784; GFX90A-NEXT:    s_mov_b32 s1, s3
785; GFX90A-NEXT:    s_mov_b32 s2, s4
786; GFX90A-NEXT:    s_mov_b32 s3, s5
787; GFX90A-NEXT:    s_mov_b32 s4, s6
788; GFX90A-NEXT:    s_mov_b32 s5, s7
789; GFX90A-NEXT:    s_mov_b32 s6, s8
790; GFX90A-NEXT:    s_mov_b32 s7, s9
791; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
792; GFX90A-NEXT:    image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc
793; GFX90A-NEXT:    s_waitcnt vmcnt(0)
794; GFX90A-NEXT:    ; return to shader part epilog
795;
796; GFX10PLUS-LABEL: atomic_or_i32_1d:
797; GFX10PLUS:       ; %bb.0: ; %main_body
798; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
799; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
800; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
801; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
802; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
803; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
804; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
805; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
806; GFX10PLUS-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
807; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
808; GFX10PLUS-NEXT:    ; return to shader part epilog
809;
810; GFX12-LABEL: atomic_or_i32_1d:
811; GFX12:       ; %bb.0: ; %main_body
812; GFX12-NEXT:    s_mov_b32 s0, s2
813; GFX12-NEXT:    s_mov_b32 s1, s3
814; GFX12-NEXT:    s_mov_b32 s2, s4
815; GFX12-NEXT:    s_mov_b32 s3, s5
816; GFX12-NEXT:    s_mov_b32 s4, s6
817; GFX12-NEXT:    s_mov_b32 s5, s7
818; GFX12-NEXT:    s_mov_b32 s6, s8
819; GFX12-NEXT:    s_mov_b32 s7, s9
820; GFX12-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
821; GFX12-NEXT:    s_wait_loadcnt 0x0
822; GFX12-NEXT:    ; return to shader part epilog
823main_body:
824  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
825  %out = bitcast i32 %v to float
826  ret float %out
827}
828
829define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
830; GFX6-LABEL: atomic_xor_i32_1d:
831; GFX6:       ; %bb.0: ; %main_body
832; GFX6-NEXT:    s_mov_b32 s0, s2
833; GFX6-NEXT:    s_mov_b32 s1, s3
834; GFX6-NEXT:    s_mov_b32 s2, s4
835; GFX6-NEXT:    s_mov_b32 s3, s5
836; GFX6-NEXT:    s_mov_b32 s4, s6
837; GFX6-NEXT:    s_mov_b32 s5, s7
838; GFX6-NEXT:    s_mov_b32 s6, s8
839; GFX6-NEXT:    s_mov_b32 s7, s9
840; GFX6-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
841; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
842; GFX6-NEXT:    ; return to shader part epilog
843;
844; GFX8-LABEL: atomic_xor_i32_1d:
845; GFX8:       ; %bb.0: ; %main_body
846; GFX8-NEXT:    s_mov_b32 s0, s2
847; GFX8-NEXT:    s_mov_b32 s1, s3
848; GFX8-NEXT:    s_mov_b32 s2, s4
849; GFX8-NEXT:    s_mov_b32 s3, s5
850; GFX8-NEXT:    s_mov_b32 s4, s6
851; GFX8-NEXT:    s_mov_b32 s5, s7
852; GFX8-NEXT:    s_mov_b32 s6, s8
853; GFX8-NEXT:    s_mov_b32 s7, s9
854; GFX8-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
855; GFX8-NEXT:    s_waitcnt vmcnt(0)
856; GFX8-NEXT:    ; return to shader part epilog
857;
858; GFX900-LABEL: atomic_xor_i32_1d:
859; GFX900:       ; %bb.0: ; %main_body
860; GFX900-NEXT:    s_mov_b32 s0, s2
861; GFX900-NEXT:    s_mov_b32 s1, s3
862; GFX900-NEXT:    s_mov_b32 s2, s4
863; GFX900-NEXT:    s_mov_b32 s3, s5
864; GFX900-NEXT:    s_mov_b32 s4, s6
865; GFX900-NEXT:    s_mov_b32 s5, s7
866; GFX900-NEXT:    s_mov_b32 s6, s8
867; GFX900-NEXT:    s_mov_b32 s7, s9
868; GFX900-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
869; GFX900-NEXT:    s_waitcnt vmcnt(0)
870; GFX900-NEXT:    ; return to shader part epilog
871;
872; GFX90A-LABEL: atomic_xor_i32_1d:
873; GFX90A:       ; %bb.0: ; %main_body
874; GFX90A-NEXT:    s_mov_b32 s0, s2
875; GFX90A-NEXT:    s_mov_b32 s1, s3
876; GFX90A-NEXT:    s_mov_b32 s2, s4
877; GFX90A-NEXT:    s_mov_b32 s3, s5
878; GFX90A-NEXT:    s_mov_b32 s4, s6
879; GFX90A-NEXT:    s_mov_b32 s5, s7
880; GFX90A-NEXT:    s_mov_b32 s6, s8
881; GFX90A-NEXT:    s_mov_b32 s7, s9
882; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
883; GFX90A-NEXT:    image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc
884; GFX90A-NEXT:    s_waitcnt vmcnt(0)
885; GFX90A-NEXT:    ; return to shader part epilog
886;
887; GFX10PLUS-LABEL: atomic_xor_i32_1d:
888; GFX10PLUS:       ; %bb.0: ; %main_body
889; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
890; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
891; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
892; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
893; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
894; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
895; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
896; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
897; GFX10PLUS-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
898; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
899; GFX10PLUS-NEXT:    ; return to shader part epilog
900;
901; GFX12-LABEL: atomic_xor_i32_1d:
902; GFX12:       ; %bb.0: ; %main_body
903; GFX12-NEXT:    s_mov_b32 s0, s2
904; GFX12-NEXT:    s_mov_b32 s1, s3
905; GFX12-NEXT:    s_mov_b32 s2, s4
906; GFX12-NEXT:    s_mov_b32 s3, s5
907; GFX12-NEXT:    s_mov_b32 s4, s6
908; GFX12-NEXT:    s_mov_b32 s5, s7
909; GFX12-NEXT:    s_mov_b32 s6, s8
910; GFX12-NEXT:    s_mov_b32 s7, s9
911; GFX12-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
912; GFX12-NEXT:    s_wait_loadcnt 0x0
913; GFX12-NEXT:    ; return to shader part epilog
914main_body:
915  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
916  %out = bitcast i32 %v to float
917  ret float %out
918}
919
920define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
921; GFX6-LABEL: atomic_inc_i32_1d:
922; GFX6:       ; %bb.0: ; %main_body
923; GFX6-NEXT:    s_mov_b32 s0, s2
924; GFX6-NEXT:    s_mov_b32 s1, s3
925; GFX6-NEXT:    s_mov_b32 s2, s4
926; GFX6-NEXT:    s_mov_b32 s3, s5
927; GFX6-NEXT:    s_mov_b32 s4, s6
928; GFX6-NEXT:    s_mov_b32 s5, s7
929; GFX6-NEXT:    s_mov_b32 s6, s8
930; GFX6-NEXT:    s_mov_b32 s7, s9
931; GFX6-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
932; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
933; GFX6-NEXT:    ; return to shader part epilog
934;
935; GFX8-LABEL: atomic_inc_i32_1d:
936; GFX8:       ; %bb.0: ; %main_body
937; GFX8-NEXT:    s_mov_b32 s0, s2
938; GFX8-NEXT:    s_mov_b32 s1, s3
939; GFX8-NEXT:    s_mov_b32 s2, s4
940; GFX8-NEXT:    s_mov_b32 s3, s5
941; GFX8-NEXT:    s_mov_b32 s4, s6
942; GFX8-NEXT:    s_mov_b32 s5, s7
943; GFX8-NEXT:    s_mov_b32 s6, s8
944; GFX8-NEXT:    s_mov_b32 s7, s9
945; GFX8-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
946; GFX8-NEXT:    s_waitcnt vmcnt(0)
947; GFX8-NEXT:    ; return to shader part epilog
948;
949; GFX900-LABEL: atomic_inc_i32_1d:
950; GFX900:       ; %bb.0: ; %main_body
951; GFX900-NEXT:    s_mov_b32 s0, s2
952; GFX900-NEXT:    s_mov_b32 s1, s3
953; GFX900-NEXT:    s_mov_b32 s2, s4
954; GFX900-NEXT:    s_mov_b32 s3, s5
955; GFX900-NEXT:    s_mov_b32 s4, s6
956; GFX900-NEXT:    s_mov_b32 s5, s7
957; GFX900-NEXT:    s_mov_b32 s6, s8
958; GFX900-NEXT:    s_mov_b32 s7, s9
959; GFX900-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
960; GFX900-NEXT:    s_waitcnt vmcnt(0)
961; GFX900-NEXT:    ; return to shader part epilog
962;
963; GFX90A-LABEL: atomic_inc_i32_1d:
964; GFX90A:       ; %bb.0: ; %main_body
965; GFX90A-NEXT:    s_mov_b32 s0, s2
966; GFX90A-NEXT:    s_mov_b32 s1, s3
967; GFX90A-NEXT:    s_mov_b32 s2, s4
968; GFX90A-NEXT:    s_mov_b32 s3, s5
969; GFX90A-NEXT:    s_mov_b32 s4, s6
970; GFX90A-NEXT:    s_mov_b32 s5, s7
971; GFX90A-NEXT:    s_mov_b32 s6, s8
972; GFX90A-NEXT:    s_mov_b32 s7, s9
973; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
974; GFX90A-NEXT:    image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc
975; GFX90A-NEXT:    s_waitcnt vmcnt(0)
976; GFX90A-NEXT:    ; return to shader part epilog
977;
978; GFX10PLUS-LABEL: atomic_inc_i32_1d:
979; GFX10PLUS:       ; %bb.0: ; %main_body
980; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
981; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
982; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
983; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
984; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
985; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
986; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
987; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
988; GFX10PLUS-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
989; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
990; GFX10PLUS-NEXT:    ; return to shader part epilog
991;
992; GFX12-LABEL: atomic_inc_i32_1d:
993; GFX12:       ; %bb.0: ; %main_body
994; GFX12-NEXT:    s_mov_b32 s0, s2
995; GFX12-NEXT:    s_mov_b32 s1, s3
996; GFX12-NEXT:    s_mov_b32 s2, s4
997; GFX12-NEXT:    s_mov_b32 s3, s5
998; GFX12-NEXT:    s_mov_b32 s4, s6
999; GFX12-NEXT:    s_mov_b32 s5, s7
1000; GFX12-NEXT:    s_mov_b32 s6, s8
1001; GFX12-NEXT:    s_mov_b32 s7, s9
1002; GFX12-NEXT:    image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1003; GFX12-NEXT:    s_wait_loadcnt 0x0
1004; GFX12-NEXT:    ; return to shader part epilog
1005main_body:
1006  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1007  %out = bitcast i32 %v to float
1008  ret float %out
1009}
1010
1011define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1012; GFX6-LABEL: atomic_dec_i32_1d:
1013; GFX6:       ; %bb.0: ; %main_body
1014; GFX6-NEXT:    s_mov_b32 s0, s2
1015; GFX6-NEXT:    s_mov_b32 s1, s3
1016; GFX6-NEXT:    s_mov_b32 s2, s4
1017; GFX6-NEXT:    s_mov_b32 s3, s5
1018; GFX6-NEXT:    s_mov_b32 s4, s6
1019; GFX6-NEXT:    s_mov_b32 s5, s7
1020; GFX6-NEXT:    s_mov_b32 s6, s8
1021; GFX6-NEXT:    s_mov_b32 s7, s9
1022; GFX6-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1023; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1024; GFX6-NEXT:    ; return to shader part epilog
1025;
1026; GFX8-LABEL: atomic_dec_i32_1d:
1027; GFX8:       ; %bb.0: ; %main_body
1028; GFX8-NEXT:    s_mov_b32 s0, s2
1029; GFX8-NEXT:    s_mov_b32 s1, s3
1030; GFX8-NEXT:    s_mov_b32 s2, s4
1031; GFX8-NEXT:    s_mov_b32 s3, s5
1032; GFX8-NEXT:    s_mov_b32 s4, s6
1033; GFX8-NEXT:    s_mov_b32 s5, s7
1034; GFX8-NEXT:    s_mov_b32 s6, s8
1035; GFX8-NEXT:    s_mov_b32 s7, s9
1036; GFX8-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1037; GFX8-NEXT:    s_waitcnt vmcnt(0)
1038; GFX8-NEXT:    ; return to shader part epilog
1039;
1040; GFX900-LABEL: atomic_dec_i32_1d:
1041; GFX900:       ; %bb.0: ; %main_body
1042; GFX900-NEXT:    s_mov_b32 s0, s2
1043; GFX900-NEXT:    s_mov_b32 s1, s3
1044; GFX900-NEXT:    s_mov_b32 s2, s4
1045; GFX900-NEXT:    s_mov_b32 s3, s5
1046; GFX900-NEXT:    s_mov_b32 s4, s6
1047; GFX900-NEXT:    s_mov_b32 s5, s7
1048; GFX900-NEXT:    s_mov_b32 s6, s8
1049; GFX900-NEXT:    s_mov_b32 s7, s9
1050; GFX900-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
1051; GFX900-NEXT:    s_waitcnt vmcnt(0)
1052; GFX900-NEXT:    ; return to shader part epilog
1053;
1054; GFX90A-LABEL: atomic_dec_i32_1d:
1055; GFX90A:       ; %bb.0: ; %main_body
1056; GFX90A-NEXT:    s_mov_b32 s0, s2
1057; GFX90A-NEXT:    s_mov_b32 s1, s3
1058; GFX90A-NEXT:    s_mov_b32 s2, s4
1059; GFX90A-NEXT:    s_mov_b32 s3, s5
1060; GFX90A-NEXT:    s_mov_b32 s4, s6
1061; GFX90A-NEXT:    s_mov_b32 s5, s7
1062; GFX90A-NEXT:    s_mov_b32 s6, s8
1063; GFX90A-NEXT:    s_mov_b32 s7, s9
1064; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
1065; GFX90A-NEXT:    image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc
1066; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1067; GFX90A-NEXT:    ; return to shader part epilog
1068;
1069; GFX10PLUS-LABEL: atomic_dec_i32_1d:
1070; GFX10PLUS:       ; %bb.0: ; %main_body
1071; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1072; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1073; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1074; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1075; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1076; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1077; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1078; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1079; GFX10PLUS-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
1080; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1081; GFX10PLUS-NEXT:    ; return to shader part epilog
1082;
1083; GFX12-LABEL: atomic_dec_i32_1d:
1084; GFX12:       ; %bb.0: ; %main_body
1085; GFX12-NEXT:    s_mov_b32 s0, s2
1086; GFX12-NEXT:    s_mov_b32 s1, s3
1087; GFX12-NEXT:    s_mov_b32 s2, s4
1088; GFX12-NEXT:    s_mov_b32 s3, s5
1089; GFX12-NEXT:    s_mov_b32 s4, s6
1090; GFX12-NEXT:    s_mov_b32 s5, s7
1091; GFX12-NEXT:    s_mov_b32 s6, s8
1092; GFX12-NEXT:    s_mov_b32 s7, s9
1093; GFX12-NEXT:    image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1094; GFX12-NEXT:    s_wait_loadcnt 0x0
1095; GFX12-NEXT:    ; return to shader part epilog
1096main_body:
1097  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1098  %out = bitcast i32 %v to float
1099  ret float %out
1100}
1101
1102define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1103; GFX6-LABEL: atomic_cmpswap_i32_1d:
1104; GFX6:       ; %bb.0: ; %main_body
1105; GFX6-NEXT:    s_mov_b32 s0, s2
1106; GFX6-NEXT:    s_mov_b32 s1, s3
1107; GFX6-NEXT:    s_mov_b32 s2, s4
1108; GFX6-NEXT:    s_mov_b32 s3, s5
1109; GFX6-NEXT:    s_mov_b32 s4, s6
1110; GFX6-NEXT:    s_mov_b32 s5, s7
1111; GFX6-NEXT:    s_mov_b32 s6, s8
1112; GFX6-NEXT:    s_mov_b32 s7, s9
1113; GFX6-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1114; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1115; GFX6-NEXT:    ; return to shader part epilog
1116;
1117; GFX8-LABEL: atomic_cmpswap_i32_1d:
1118; GFX8:       ; %bb.0: ; %main_body
1119; GFX8-NEXT:    s_mov_b32 s0, s2
1120; GFX8-NEXT:    s_mov_b32 s1, s3
1121; GFX8-NEXT:    s_mov_b32 s2, s4
1122; GFX8-NEXT:    s_mov_b32 s3, s5
1123; GFX8-NEXT:    s_mov_b32 s4, s6
1124; GFX8-NEXT:    s_mov_b32 s5, s7
1125; GFX8-NEXT:    s_mov_b32 s6, s8
1126; GFX8-NEXT:    s_mov_b32 s7, s9
1127; GFX8-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1128; GFX8-NEXT:    s_waitcnt vmcnt(0)
1129; GFX8-NEXT:    ; return to shader part epilog
1130;
1131; GFX900-LABEL: atomic_cmpswap_i32_1d:
1132; GFX900:       ; %bb.0: ; %main_body
1133; GFX900-NEXT:    s_mov_b32 s0, s2
1134; GFX900-NEXT:    s_mov_b32 s1, s3
1135; GFX900-NEXT:    s_mov_b32 s2, s4
1136; GFX900-NEXT:    s_mov_b32 s3, s5
1137; GFX900-NEXT:    s_mov_b32 s4, s6
1138; GFX900-NEXT:    s_mov_b32 s5, s7
1139; GFX900-NEXT:    s_mov_b32 s6, s8
1140; GFX900-NEXT:    s_mov_b32 s7, s9
1141; GFX900-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1142; GFX900-NEXT:    s_waitcnt vmcnt(0)
1143; GFX900-NEXT:    ; return to shader part epilog
1144;
1145; GFX90A-LABEL: atomic_cmpswap_i32_1d:
1146; GFX90A:       ; %bb.0: ; %main_body
1147; GFX90A-NEXT:    s_mov_b32 s0, s2
1148; GFX90A-NEXT:    s_mov_b32 s1, s3
1149; GFX90A-NEXT:    s_mov_b32 s2, s4
1150; GFX90A-NEXT:    s_mov_b32 s3, s5
1151; GFX90A-NEXT:    s_mov_b32 s4, s6
1152; GFX90A-NEXT:    s_mov_b32 s5, s7
1153; GFX90A-NEXT:    s_mov_b32 s6, s8
1154; GFX90A-NEXT:    s_mov_b32 s7, s9
1155; GFX90A-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1156; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1157; GFX90A-NEXT:    ; return to shader part epilog
1158;
1159; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d:
1160; GFX10PLUS:       ; %bb.0: ; %main_body
1161; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1162; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1163; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1164; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1165; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1166; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1167; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1168; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1169; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1170; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1171; GFX10PLUS-NEXT:    ; return to shader part epilog
1172;
1173; GFX12-LABEL: atomic_cmpswap_i32_1d:
1174; GFX12:       ; %bb.0: ; %main_body
1175; GFX12-NEXT:    s_mov_b32 s0, s2
1176; GFX12-NEXT:    s_mov_b32 s1, s3
1177; GFX12-NEXT:    s_mov_b32 s2, s4
1178; GFX12-NEXT:    s_mov_b32 s3, s5
1179; GFX12-NEXT:    s_mov_b32 s4, s6
1180; GFX12-NEXT:    s_mov_b32 s5, s7
1181; GFX12-NEXT:    s_mov_b32 s6, s8
1182; GFX12-NEXT:    s_mov_b32 s7, s9
1183; GFX12-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1184; GFX12-NEXT:    s_wait_loadcnt 0x0
1185; GFX12-NEXT:    ; return to shader part epilog
1186main_body:
1187  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1188  %out = bitcast i32 %v to float
1189  ret float %out
1190}
1191
1192define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
1193; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
1194; GFX6:       ; %bb.0: ; %main_body
1195; GFX6-NEXT:    s_mov_b32 s0, s2
1196; GFX6-NEXT:    s_mov_b32 s1, s3
1197; GFX6-NEXT:    s_mov_b32 s2, s4
1198; GFX6-NEXT:    s_mov_b32 s3, s5
1199; GFX6-NEXT:    s_mov_b32 s4, s6
1200; GFX6-NEXT:    s_mov_b32 s5, s7
1201; GFX6-NEXT:    s_mov_b32 s6, s8
1202; GFX6-NEXT:    s_mov_b32 s7, s9
1203; GFX6-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1204; GFX6-NEXT:    s_endpgm
1205;
1206; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
1207; GFX8:       ; %bb.0: ; %main_body
1208; GFX8-NEXT:    s_mov_b32 s0, s2
1209; GFX8-NEXT:    s_mov_b32 s1, s3
1210; GFX8-NEXT:    s_mov_b32 s2, s4
1211; GFX8-NEXT:    s_mov_b32 s3, s5
1212; GFX8-NEXT:    s_mov_b32 s4, s6
1213; GFX8-NEXT:    s_mov_b32 s5, s7
1214; GFX8-NEXT:    s_mov_b32 s6, s8
1215; GFX8-NEXT:    s_mov_b32 s7, s9
1216; GFX8-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1217; GFX8-NEXT:    s_endpgm
1218;
1219; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
1220; GFX900:       ; %bb.0: ; %main_body
1221; GFX900-NEXT:    s_mov_b32 s0, s2
1222; GFX900-NEXT:    s_mov_b32 s1, s3
1223; GFX900-NEXT:    s_mov_b32 s2, s4
1224; GFX900-NEXT:    s_mov_b32 s3, s5
1225; GFX900-NEXT:    s_mov_b32 s4, s6
1226; GFX900-NEXT:    s_mov_b32 s5, s7
1227; GFX900-NEXT:    s_mov_b32 s6, s8
1228; GFX900-NEXT:    s_mov_b32 s7, s9
1229; GFX900-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1230; GFX900-NEXT:    s_endpgm
1231;
1232; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
1233; GFX90A:       ; %bb.0: ; %main_body
1234; GFX90A-NEXT:    s_mov_b32 s0, s2
1235; GFX90A-NEXT:    s_mov_b32 s1, s3
1236; GFX90A-NEXT:    s_mov_b32 s2, s4
1237; GFX90A-NEXT:    s_mov_b32 s3, s5
1238; GFX90A-NEXT:    s_mov_b32 s4, s6
1239; GFX90A-NEXT:    s_mov_b32 s5, s7
1240; GFX90A-NEXT:    s_mov_b32 s6, s8
1241; GFX90A-NEXT:    s_mov_b32 s7, s9
1242; GFX90A-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
1243; GFX90A-NEXT:    s_endpgm
1244;
1245; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
1246; GFX10PLUS:       ; %bb.0: ; %main_body
1247; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1248; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1249; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1250; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1251; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1252; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1253; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1254; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1255; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
1256; GFX10PLUS-NEXT:    s_endpgm
1257;
1258; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return:
1259; GFX12:       ; %bb.0: ; %main_body
1260; GFX12-NEXT:    s_mov_b32 s0, s2
1261; GFX12-NEXT:    s_mov_b32 s1, s3
1262; GFX12-NEXT:    s_mov_b32 s2, s4
1263; GFX12-NEXT:    s_mov_b32 s3, s5
1264; GFX12-NEXT:    s_mov_b32 s4, s6
1265; GFX12-NEXT:    s_mov_b32 s5, s7
1266; GFX12-NEXT:    s_mov_b32 s6, s8
1267; GFX12-NEXT:    s_mov_b32 s7, s9
1268; GFX12-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
1269; GFX12-NEXT:    s_endpgm
1270main_body:
1271  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
1272  ret void
1273}
1274
1275define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
1276; GFX6-LABEL: atomic_add_i32_2d:
1277; GFX6:       ; %bb.0: ; %main_body
1278; GFX6-NEXT:    s_mov_b32 s0, s2
1279; GFX6-NEXT:    s_mov_b32 s1, s3
1280; GFX6-NEXT:    s_mov_b32 s2, s4
1281; GFX6-NEXT:    s_mov_b32 s3, s5
1282; GFX6-NEXT:    s_mov_b32 s4, s6
1283; GFX6-NEXT:    s_mov_b32 s5, s7
1284; GFX6-NEXT:    s_mov_b32 s6, s8
1285; GFX6-NEXT:    s_mov_b32 s7, s9
1286; GFX6-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1287; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1288; GFX6-NEXT:    ; return to shader part epilog
1289;
1290; GFX8-LABEL: atomic_add_i32_2d:
1291; GFX8:       ; %bb.0: ; %main_body
1292; GFX8-NEXT:    s_mov_b32 s0, s2
1293; GFX8-NEXT:    s_mov_b32 s1, s3
1294; GFX8-NEXT:    s_mov_b32 s2, s4
1295; GFX8-NEXT:    s_mov_b32 s3, s5
1296; GFX8-NEXT:    s_mov_b32 s4, s6
1297; GFX8-NEXT:    s_mov_b32 s5, s7
1298; GFX8-NEXT:    s_mov_b32 s6, s8
1299; GFX8-NEXT:    s_mov_b32 s7, s9
1300; GFX8-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1301; GFX8-NEXT:    s_waitcnt vmcnt(0)
1302; GFX8-NEXT:    ; return to shader part epilog
1303;
1304; GFX900-LABEL: atomic_add_i32_2d:
1305; GFX900:       ; %bb.0: ; %main_body
1306; GFX900-NEXT:    s_mov_b32 s0, s2
1307; GFX900-NEXT:    s_mov_b32 s1, s3
1308; GFX900-NEXT:    s_mov_b32 s2, s4
1309; GFX900-NEXT:    s_mov_b32 s3, s5
1310; GFX900-NEXT:    s_mov_b32 s4, s6
1311; GFX900-NEXT:    s_mov_b32 s5, s7
1312; GFX900-NEXT:    s_mov_b32 s6, s8
1313; GFX900-NEXT:    s_mov_b32 s7, s9
1314; GFX900-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
1315; GFX900-NEXT:    s_waitcnt vmcnt(0)
1316; GFX900-NEXT:    ; return to shader part epilog
1317;
1318; GFX90A-LABEL: atomic_add_i32_2d:
1319; GFX90A:       ; %bb.0: ; %main_body
1320; GFX90A-NEXT:    s_mov_b32 s0, s2
1321; GFX90A-NEXT:    s_mov_b32 s1, s3
1322; GFX90A-NEXT:    s_mov_b32 s2, s4
1323; GFX90A-NEXT:    s_mov_b32 s3, s5
1324; GFX90A-NEXT:    s_mov_b32 s4, s6
1325; GFX90A-NEXT:    s_mov_b32 s5, s7
1326; GFX90A-NEXT:    s_mov_b32 s6, s8
1327; GFX90A-NEXT:    s_mov_b32 s7, s9
1328; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1329; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1330; GFX90A-NEXT:    image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc
1331; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1332; GFX90A-NEXT:    ; return to shader part epilog
1333;
1334; GFX10PLUS-LABEL: atomic_add_i32_2d:
1335; GFX10PLUS:       ; %bb.0: ; %main_body
1336; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1337; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1338; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1339; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1340; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1341; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1342; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1343; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1344; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
1345; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1346; GFX10PLUS-NEXT:    ; return to shader part epilog
1347;
1348; GFX12-LABEL: atomic_add_i32_2d:
1349; GFX12:       ; %bb.0: ; %main_body
1350; GFX12-NEXT:    s_mov_b32 s0, s2
1351; GFX12-NEXT:    s_mov_b32 s1, s3
1352; GFX12-NEXT:    s_mov_b32 s2, s4
1353; GFX12-NEXT:    s_mov_b32 s3, s5
1354; GFX12-NEXT:    s_mov_b32 s4, s6
1355; GFX12-NEXT:    s_mov_b32 s5, s7
1356; GFX12-NEXT:    s_mov_b32 s6, s8
1357; GFX12-NEXT:    s_mov_b32 s7, s9
1358; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
1359; GFX12-NEXT:    s_wait_loadcnt 0x0
1360; GFX12-NEXT:    ; return to shader part epilog
1361main_body:
1362  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
1363  %out = bitcast i32 %v to float
1364  ret float %out
1365}
1366
1367define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
1368; GFX6-LABEL: atomic_add_i32_3d:
1369; GFX6:       ; %bb.0: ; %main_body
1370; GFX6-NEXT:    s_mov_b32 s0, s2
1371; GFX6-NEXT:    s_mov_b32 s1, s3
1372; GFX6-NEXT:    s_mov_b32 s2, s4
1373; GFX6-NEXT:    s_mov_b32 s3, s5
1374; GFX6-NEXT:    s_mov_b32 s4, s6
1375; GFX6-NEXT:    s_mov_b32 s5, s7
1376; GFX6-NEXT:    s_mov_b32 s6, s8
1377; GFX6-NEXT:    s_mov_b32 s7, s9
1378; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1379; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1380; GFX6-NEXT:    ; return to shader part epilog
1381;
1382; GFX8-LABEL: atomic_add_i32_3d:
1383; GFX8:       ; %bb.0: ; %main_body
1384; GFX8-NEXT:    s_mov_b32 s0, s2
1385; GFX8-NEXT:    s_mov_b32 s1, s3
1386; GFX8-NEXT:    s_mov_b32 s2, s4
1387; GFX8-NEXT:    s_mov_b32 s3, s5
1388; GFX8-NEXT:    s_mov_b32 s4, s6
1389; GFX8-NEXT:    s_mov_b32 s5, s7
1390; GFX8-NEXT:    s_mov_b32 s6, s8
1391; GFX8-NEXT:    s_mov_b32 s7, s9
1392; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1393; GFX8-NEXT:    s_waitcnt vmcnt(0)
1394; GFX8-NEXT:    ; return to shader part epilog
1395;
1396; GFX900-LABEL: atomic_add_i32_3d:
1397; GFX900:       ; %bb.0: ; %main_body
1398; GFX900-NEXT:    s_mov_b32 s0, s2
1399; GFX900-NEXT:    s_mov_b32 s1, s3
1400; GFX900-NEXT:    s_mov_b32 s2, s4
1401; GFX900-NEXT:    s_mov_b32 s3, s5
1402; GFX900-NEXT:    s_mov_b32 s4, s6
1403; GFX900-NEXT:    s_mov_b32 s5, s7
1404; GFX900-NEXT:    s_mov_b32 s6, s8
1405; GFX900-NEXT:    s_mov_b32 s7, s9
1406; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1407; GFX900-NEXT:    s_waitcnt vmcnt(0)
1408; GFX900-NEXT:    ; return to shader part epilog
1409;
1410; GFX90A-LABEL: atomic_add_i32_3d:
1411; GFX90A:       ; %bb.0: ; %main_body
1412; GFX90A-NEXT:    s_mov_b32 s0, s2
1413; GFX90A-NEXT:    s_mov_b32 s1, s3
1414; GFX90A-NEXT:    s_mov_b32 s2, s4
1415; GFX90A-NEXT:    s_mov_b32 s3, s5
1416; GFX90A-NEXT:    s_mov_b32 s4, s6
1417; GFX90A-NEXT:    s_mov_b32 s5, s7
1418; GFX90A-NEXT:    s_mov_b32 s6, s8
1419; GFX90A-NEXT:    s_mov_b32 s7, s9
1420; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1421; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1422; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
1423; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1424; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1425; GFX90A-NEXT:    ; return to shader part epilog
1426;
1427; GFX10PLUS-LABEL: atomic_add_i32_3d:
1428; GFX10PLUS:       ; %bb.0: ; %main_body
1429; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1430; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1431; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1432; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1433; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1434; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1435; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1436; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1437; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
1438; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1439; GFX10PLUS-NEXT:    ; return to shader part epilog
1440;
1441; GFX12-LABEL: atomic_add_i32_3d:
1442; GFX12:       ; %bb.0: ; %main_body
1443; GFX12-NEXT:    s_mov_b32 s0, s2
1444; GFX12-NEXT:    s_mov_b32 s1, s3
1445; GFX12-NEXT:    s_mov_b32 s2, s4
1446; GFX12-NEXT:    s_mov_b32 s3, s5
1447; GFX12-NEXT:    s_mov_b32 s4, s6
1448; GFX12-NEXT:    s_mov_b32 s5, s7
1449; GFX12-NEXT:    s_mov_b32 s6, s8
1450; GFX12-NEXT:    s_mov_b32 s7, s9
1451; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
1452; GFX12-NEXT:    s_wait_loadcnt 0x0
1453; GFX12-NEXT:    ; return to shader part epilog
1454main_body:
1455  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
1456  %out = bitcast i32 %v to float
1457  ret float %out
1458}
1459
1460define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
1461; GFX6-LABEL: atomic_add_i32_cube:
1462; GFX6:       ; %bb.0: ; %main_body
1463; GFX6-NEXT:    s_mov_b32 s0, s2
1464; GFX6-NEXT:    s_mov_b32 s1, s3
1465; GFX6-NEXT:    s_mov_b32 s2, s4
1466; GFX6-NEXT:    s_mov_b32 s3, s5
1467; GFX6-NEXT:    s_mov_b32 s4, s6
1468; GFX6-NEXT:    s_mov_b32 s5, s7
1469; GFX6-NEXT:    s_mov_b32 s6, s8
1470; GFX6-NEXT:    s_mov_b32 s7, s9
1471; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1472; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1473; GFX6-NEXT:    ; return to shader part epilog
1474;
1475; GFX8-LABEL: atomic_add_i32_cube:
1476; GFX8:       ; %bb.0: ; %main_body
1477; GFX8-NEXT:    s_mov_b32 s0, s2
1478; GFX8-NEXT:    s_mov_b32 s1, s3
1479; GFX8-NEXT:    s_mov_b32 s2, s4
1480; GFX8-NEXT:    s_mov_b32 s3, s5
1481; GFX8-NEXT:    s_mov_b32 s4, s6
1482; GFX8-NEXT:    s_mov_b32 s5, s7
1483; GFX8-NEXT:    s_mov_b32 s6, s8
1484; GFX8-NEXT:    s_mov_b32 s7, s9
1485; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1486; GFX8-NEXT:    s_waitcnt vmcnt(0)
1487; GFX8-NEXT:    ; return to shader part epilog
1488;
1489; GFX900-LABEL: atomic_add_i32_cube:
1490; GFX900:       ; %bb.0: ; %main_body
1491; GFX900-NEXT:    s_mov_b32 s0, s2
1492; GFX900-NEXT:    s_mov_b32 s1, s3
1493; GFX900-NEXT:    s_mov_b32 s2, s4
1494; GFX900-NEXT:    s_mov_b32 s3, s5
1495; GFX900-NEXT:    s_mov_b32 s4, s6
1496; GFX900-NEXT:    s_mov_b32 s5, s7
1497; GFX900-NEXT:    s_mov_b32 s6, s8
1498; GFX900-NEXT:    s_mov_b32 s7, s9
1499; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1500; GFX900-NEXT:    s_waitcnt vmcnt(0)
1501; GFX900-NEXT:    ; return to shader part epilog
1502;
1503; GFX90A-LABEL: atomic_add_i32_cube:
1504; GFX90A:       ; %bb.0: ; %main_body
1505; GFX90A-NEXT:    s_mov_b32 s0, s2
1506; GFX90A-NEXT:    s_mov_b32 s1, s3
1507; GFX90A-NEXT:    s_mov_b32 s2, s4
1508; GFX90A-NEXT:    s_mov_b32 s3, s5
1509; GFX90A-NEXT:    s_mov_b32 s4, s6
1510; GFX90A-NEXT:    s_mov_b32 s5, s7
1511; GFX90A-NEXT:    s_mov_b32 s6, s8
1512; GFX90A-NEXT:    s_mov_b32 s7, s9
1513; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1514; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1515; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
1516; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1517; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1518; GFX90A-NEXT:    ; return to shader part epilog
1519;
1520; GFX10PLUS-LABEL: atomic_add_i32_cube:
1521; GFX10PLUS:       ; %bb.0: ; %main_body
1522; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1523; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1524; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1525; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1526; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1527; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1528; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1529; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1530; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
1531; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1532; GFX10PLUS-NEXT:    ; return to shader part epilog
1533;
1534; GFX12-LABEL: atomic_add_i32_cube:
1535; GFX12:       ; %bb.0: ; %main_body
1536; GFX12-NEXT:    s_mov_b32 s0, s2
1537; GFX12-NEXT:    s_mov_b32 s1, s3
1538; GFX12-NEXT:    s_mov_b32 s2, s4
1539; GFX12-NEXT:    s_mov_b32 s3, s5
1540; GFX12-NEXT:    s_mov_b32 s4, s6
1541; GFX12-NEXT:    s_mov_b32 s5, s7
1542; GFX12-NEXT:    s_mov_b32 s6, s8
1543; GFX12-NEXT:    s_mov_b32 s7, s9
1544; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
1545; GFX12-NEXT:    s_wait_loadcnt 0x0
1546; GFX12-NEXT:    ; return to shader part epilog
1547main_body:
1548  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
1549  %out = bitcast i32 %v to float
1550  ret float %out
1551}
1552
1553define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
1554; GFX6-LABEL: atomic_add_i32_1darray:
1555; GFX6:       ; %bb.0: ; %main_body
1556; GFX6-NEXT:    s_mov_b32 s0, s2
1557; GFX6-NEXT:    s_mov_b32 s1, s3
1558; GFX6-NEXT:    s_mov_b32 s2, s4
1559; GFX6-NEXT:    s_mov_b32 s3, s5
1560; GFX6-NEXT:    s_mov_b32 s4, s6
1561; GFX6-NEXT:    s_mov_b32 s5, s7
1562; GFX6-NEXT:    s_mov_b32 s6, s8
1563; GFX6-NEXT:    s_mov_b32 s7, s9
1564; GFX6-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1565; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1566; GFX6-NEXT:    ; return to shader part epilog
1567;
1568; GFX8-LABEL: atomic_add_i32_1darray:
1569; GFX8:       ; %bb.0: ; %main_body
1570; GFX8-NEXT:    s_mov_b32 s0, s2
1571; GFX8-NEXT:    s_mov_b32 s1, s3
1572; GFX8-NEXT:    s_mov_b32 s2, s4
1573; GFX8-NEXT:    s_mov_b32 s3, s5
1574; GFX8-NEXT:    s_mov_b32 s4, s6
1575; GFX8-NEXT:    s_mov_b32 s5, s7
1576; GFX8-NEXT:    s_mov_b32 s6, s8
1577; GFX8-NEXT:    s_mov_b32 s7, s9
1578; GFX8-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1579; GFX8-NEXT:    s_waitcnt vmcnt(0)
1580; GFX8-NEXT:    ; return to shader part epilog
1581;
1582; GFX900-LABEL: atomic_add_i32_1darray:
1583; GFX900:       ; %bb.0: ; %main_body
1584; GFX900-NEXT:    s_mov_b32 s0, s2
1585; GFX900-NEXT:    s_mov_b32 s1, s3
1586; GFX900-NEXT:    s_mov_b32 s2, s4
1587; GFX900-NEXT:    s_mov_b32 s3, s5
1588; GFX900-NEXT:    s_mov_b32 s4, s6
1589; GFX900-NEXT:    s_mov_b32 s5, s7
1590; GFX900-NEXT:    s_mov_b32 s6, s8
1591; GFX900-NEXT:    s_mov_b32 s7, s9
1592; GFX900-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
1593; GFX900-NEXT:    s_waitcnt vmcnt(0)
1594; GFX900-NEXT:    ; return to shader part epilog
1595;
1596; GFX90A-LABEL: atomic_add_i32_1darray:
1597; GFX90A:       ; %bb.0: ; %main_body
1598; GFX90A-NEXT:    s_mov_b32 s0, s2
1599; GFX90A-NEXT:    s_mov_b32 s1, s3
1600; GFX90A-NEXT:    s_mov_b32 s2, s4
1601; GFX90A-NEXT:    s_mov_b32 s3, s5
1602; GFX90A-NEXT:    s_mov_b32 s4, s6
1603; GFX90A-NEXT:    s_mov_b32 s5, s7
1604; GFX90A-NEXT:    s_mov_b32 s6, s8
1605; GFX90A-NEXT:    s_mov_b32 s7, s9
1606; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1607; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1608; GFX90A-NEXT:    image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da
1609; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1610; GFX90A-NEXT:    ; return to shader part epilog
1611;
1612; GFX10PLUS-LABEL: atomic_add_i32_1darray:
1613; GFX10PLUS:       ; %bb.0: ; %main_body
1614; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1615; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1616; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1617; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1618; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1619; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1620; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1621; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1622; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
1623; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1624; GFX10PLUS-NEXT:    ; return to shader part epilog
1625;
1626; GFX12-LABEL: atomic_add_i32_1darray:
1627; GFX12:       ; %bb.0: ; %main_body
1628; GFX12-NEXT:    s_mov_b32 s0, s2
1629; GFX12-NEXT:    s_mov_b32 s1, s3
1630; GFX12-NEXT:    s_mov_b32 s2, s4
1631; GFX12-NEXT:    s_mov_b32 s3, s5
1632; GFX12-NEXT:    s_mov_b32 s4, s6
1633; GFX12-NEXT:    s_mov_b32 s5, s7
1634; GFX12-NEXT:    s_mov_b32 s6, s8
1635; GFX12-NEXT:    s_mov_b32 s7, s9
1636; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
1637; GFX12-NEXT:    s_wait_loadcnt 0x0
1638; GFX12-NEXT:    ; return to shader part epilog
1639main_body:
1640  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1641  %out = bitcast i32 %v to float
1642  ret float %out
1643}
1644
1645define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
1646; GFX6-LABEL: atomic_add_i32_2darray:
1647; GFX6:       ; %bb.0: ; %main_body
1648; GFX6-NEXT:    s_mov_b32 s0, s2
1649; GFX6-NEXT:    s_mov_b32 s1, s3
1650; GFX6-NEXT:    s_mov_b32 s2, s4
1651; GFX6-NEXT:    s_mov_b32 s3, s5
1652; GFX6-NEXT:    s_mov_b32 s4, s6
1653; GFX6-NEXT:    s_mov_b32 s5, s7
1654; GFX6-NEXT:    s_mov_b32 s6, s8
1655; GFX6-NEXT:    s_mov_b32 s7, s9
1656; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1657; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1658; GFX6-NEXT:    ; return to shader part epilog
1659;
1660; GFX8-LABEL: atomic_add_i32_2darray:
1661; GFX8:       ; %bb.0: ; %main_body
1662; GFX8-NEXT:    s_mov_b32 s0, s2
1663; GFX8-NEXT:    s_mov_b32 s1, s3
1664; GFX8-NEXT:    s_mov_b32 s2, s4
1665; GFX8-NEXT:    s_mov_b32 s3, s5
1666; GFX8-NEXT:    s_mov_b32 s4, s6
1667; GFX8-NEXT:    s_mov_b32 s5, s7
1668; GFX8-NEXT:    s_mov_b32 s6, s8
1669; GFX8-NEXT:    s_mov_b32 s7, s9
1670; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1671; GFX8-NEXT:    s_waitcnt vmcnt(0)
1672; GFX8-NEXT:    ; return to shader part epilog
1673;
1674; GFX900-LABEL: atomic_add_i32_2darray:
1675; GFX900:       ; %bb.0: ; %main_body
1676; GFX900-NEXT:    s_mov_b32 s0, s2
1677; GFX900-NEXT:    s_mov_b32 s1, s3
1678; GFX900-NEXT:    s_mov_b32 s2, s4
1679; GFX900-NEXT:    s_mov_b32 s3, s5
1680; GFX900-NEXT:    s_mov_b32 s4, s6
1681; GFX900-NEXT:    s_mov_b32 s5, s7
1682; GFX900-NEXT:    s_mov_b32 s6, s8
1683; GFX900-NEXT:    s_mov_b32 s7, s9
1684; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
1685; GFX900-NEXT:    s_waitcnt vmcnt(0)
1686; GFX900-NEXT:    ; return to shader part epilog
1687;
1688; GFX90A-LABEL: atomic_add_i32_2darray:
1689; GFX90A:       ; %bb.0: ; %main_body
1690; GFX90A-NEXT:    s_mov_b32 s0, s2
1691; GFX90A-NEXT:    s_mov_b32 s1, s3
1692; GFX90A-NEXT:    s_mov_b32 s2, s4
1693; GFX90A-NEXT:    s_mov_b32 s3, s5
1694; GFX90A-NEXT:    s_mov_b32 s4, s6
1695; GFX90A-NEXT:    s_mov_b32 s5, s7
1696; GFX90A-NEXT:    s_mov_b32 s6, s8
1697; GFX90A-NEXT:    s_mov_b32 s7, s9
1698; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1699; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1700; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
1701; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
1702; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1703; GFX90A-NEXT:    ; return to shader part epilog
1704;
1705; GFX10PLUS-LABEL: atomic_add_i32_2darray:
1706; GFX10PLUS:       ; %bb.0: ; %main_body
1707; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1708; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1709; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1710; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1711; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1712; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1713; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1714; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1715; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
1716; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1717; GFX10PLUS-NEXT:    ; return to shader part epilog
1718;
1719; GFX12-LABEL: atomic_add_i32_2darray:
1720; GFX12:       ; %bb.0: ; %main_body
1721; GFX12-NEXT:    s_mov_b32 s0, s2
1722; GFX12-NEXT:    s_mov_b32 s1, s3
1723; GFX12-NEXT:    s_mov_b32 s2, s4
1724; GFX12-NEXT:    s_mov_b32 s3, s5
1725; GFX12-NEXT:    s_mov_b32 s4, s6
1726; GFX12-NEXT:    s_mov_b32 s5, s7
1727; GFX12-NEXT:    s_mov_b32 s6, s8
1728; GFX12-NEXT:    s_mov_b32 s7, s9
1729; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
1730; GFX12-NEXT:    s_wait_loadcnt 0x0
1731; GFX12-NEXT:    ; return to shader part epilog
1732main_body:
1733  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
1734  %out = bitcast i32 %v to float
1735  ret float %out
1736}
1737
1738define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
1739; GFX6-LABEL: atomic_add_i32_2dmsaa:
1740; GFX6:       ; %bb.0: ; %main_body
1741; GFX6-NEXT:    s_mov_b32 s0, s2
1742; GFX6-NEXT:    s_mov_b32 s1, s3
1743; GFX6-NEXT:    s_mov_b32 s2, s4
1744; GFX6-NEXT:    s_mov_b32 s3, s5
1745; GFX6-NEXT:    s_mov_b32 s4, s6
1746; GFX6-NEXT:    s_mov_b32 s5, s7
1747; GFX6-NEXT:    s_mov_b32 s6, s8
1748; GFX6-NEXT:    s_mov_b32 s7, s9
1749; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1750; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1751; GFX6-NEXT:    ; return to shader part epilog
1752;
1753; GFX8-LABEL: atomic_add_i32_2dmsaa:
1754; GFX8:       ; %bb.0: ; %main_body
1755; GFX8-NEXT:    s_mov_b32 s0, s2
1756; GFX8-NEXT:    s_mov_b32 s1, s3
1757; GFX8-NEXT:    s_mov_b32 s2, s4
1758; GFX8-NEXT:    s_mov_b32 s3, s5
1759; GFX8-NEXT:    s_mov_b32 s4, s6
1760; GFX8-NEXT:    s_mov_b32 s5, s7
1761; GFX8-NEXT:    s_mov_b32 s6, s8
1762; GFX8-NEXT:    s_mov_b32 s7, s9
1763; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1764; GFX8-NEXT:    s_waitcnt vmcnt(0)
1765; GFX8-NEXT:    ; return to shader part epilog
1766;
1767; GFX900-LABEL: atomic_add_i32_2dmsaa:
1768; GFX900:       ; %bb.0: ; %main_body
1769; GFX900-NEXT:    s_mov_b32 s0, s2
1770; GFX900-NEXT:    s_mov_b32 s1, s3
1771; GFX900-NEXT:    s_mov_b32 s2, s4
1772; GFX900-NEXT:    s_mov_b32 s3, s5
1773; GFX900-NEXT:    s_mov_b32 s4, s6
1774; GFX900-NEXT:    s_mov_b32 s5, s7
1775; GFX900-NEXT:    s_mov_b32 s6, s8
1776; GFX900-NEXT:    s_mov_b32 s7, s9
1777; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
1778; GFX900-NEXT:    s_waitcnt vmcnt(0)
1779; GFX900-NEXT:    ; return to shader part epilog
1780;
1781; GFX90A-LABEL: atomic_add_i32_2dmsaa:
1782; GFX90A:       ; %bb.0: ; %main_body
1783; GFX90A-NEXT:    s_mov_b32 s0, s2
1784; GFX90A-NEXT:    s_mov_b32 s1, s3
1785; GFX90A-NEXT:    s_mov_b32 s2, s4
1786; GFX90A-NEXT:    s_mov_b32 s3, s5
1787; GFX90A-NEXT:    s_mov_b32 s4, s6
1788; GFX90A-NEXT:    s_mov_b32 s5, s7
1789; GFX90A-NEXT:    s_mov_b32 s6, s8
1790; GFX90A-NEXT:    s_mov_b32 s7, s9
1791; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
1792; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
1793; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
1794; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
1795; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1796; GFX90A-NEXT:    ; return to shader part epilog
1797;
1798; GFX10PLUS-LABEL: atomic_add_i32_2dmsaa:
1799; GFX10PLUS:       ; %bb.0: ; %main_body
1800; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1801; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1802; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1803; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1804; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1805; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1806; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1807; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1808; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
1809; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1810; GFX10PLUS-NEXT:    ; return to shader part epilog
1811;
1812; GFX12-LABEL: atomic_add_i32_2dmsaa:
1813; GFX12:       ; %bb.0: ; %main_body
1814; GFX12-NEXT:    s_mov_b32 s0, s2
1815; GFX12-NEXT:    s_mov_b32 s1, s3
1816; GFX12-NEXT:    s_mov_b32 s2, s4
1817; GFX12-NEXT:    s_mov_b32 s3, s5
1818; GFX12-NEXT:    s_mov_b32 s4, s6
1819; GFX12-NEXT:    s_mov_b32 s5, s7
1820; GFX12-NEXT:    s_mov_b32 s6, s8
1821; GFX12-NEXT:    s_mov_b32 s7, s9
1822; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
1823; GFX12-NEXT:    s_wait_loadcnt 0x0
1824; GFX12-NEXT:    ; return to shader part epilog
1825main_body:
1826  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1827  %out = bitcast i32 %v to float
1828  ret float %out
1829}
1830
1831define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
1832; GFX6-LABEL: atomic_add_i32_2darraymsaa:
1833; GFX6:       ; %bb.0: ; %main_body
1834; GFX6-NEXT:    s_mov_b32 s0, s2
1835; GFX6-NEXT:    s_mov_b32 s1, s3
1836; GFX6-NEXT:    s_mov_b32 s2, s4
1837; GFX6-NEXT:    s_mov_b32 s3, s5
1838; GFX6-NEXT:    s_mov_b32 s4, s6
1839; GFX6-NEXT:    s_mov_b32 s5, s7
1840; GFX6-NEXT:    s_mov_b32 s6, s8
1841; GFX6-NEXT:    s_mov_b32 s7, s9
1842; GFX6-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1843; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1844; GFX6-NEXT:    ; return to shader part epilog
1845;
1846; GFX8-LABEL: atomic_add_i32_2darraymsaa:
1847; GFX8:       ; %bb.0: ; %main_body
1848; GFX8-NEXT:    s_mov_b32 s0, s2
1849; GFX8-NEXT:    s_mov_b32 s1, s3
1850; GFX8-NEXT:    s_mov_b32 s2, s4
1851; GFX8-NEXT:    s_mov_b32 s3, s5
1852; GFX8-NEXT:    s_mov_b32 s4, s6
1853; GFX8-NEXT:    s_mov_b32 s5, s7
1854; GFX8-NEXT:    s_mov_b32 s6, s8
1855; GFX8-NEXT:    s_mov_b32 s7, s9
1856; GFX8-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1857; GFX8-NEXT:    s_waitcnt vmcnt(0)
1858; GFX8-NEXT:    ; return to shader part epilog
1859;
1860; GFX900-LABEL: atomic_add_i32_2darraymsaa:
1861; GFX900:       ; %bb.0: ; %main_body
1862; GFX900-NEXT:    s_mov_b32 s0, s2
1863; GFX900-NEXT:    s_mov_b32 s1, s3
1864; GFX900-NEXT:    s_mov_b32 s2, s4
1865; GFX900-NEXT:    s_mov_b32 s3, s5
1866; GFX900-NEXT:    s_mov_b32 s4, s6
1867; GFX900-NEXT:    s_mov_b32 s5, s7
1868; GFX900-NEXT:    s_mov_b32 s6, s8
1869; GFX900-NEXT:    s_mov_b32 s7, s9
1870; GFX900-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
1871; GFX900-NEXT:    s_waitcnt vmcnt(0)
1872; GFX900-NEXT:    ; return to shader part epilog
1873;
1874; GFX90A-LABEL: atomic_add_i32_2darraymsaa:
1875; GFX90A:       ; %bb.0: ; %main_body
1876; GFX90A-NEXT:    s_mov_b32 s0, s2
1877; GFX90A-NEXT:    s_mov_b32 s1, s3
1878; GFX90A-NEXT:    s_mov_b32 s2, s4
1879; GFX90A-NEXT:    s_mov_b32 s3, s5
1880; GFX90A-NEXT:    s_mov_b32 s4, s6
1881; GFX90A-NEXT:    s_mov_b32 s5, s7
1882; GFX90A-NEXT:    s_mov_b32 s6, s8
1883; GFX90A-NEXT:    s_mov_b32 s7, s9
1884; GFX90A-NEXT:    v_mov_b32_e32 v6, v1
1885; GFX90A-NEXT:    v_mov_b32_e32 v7, v2
1886; GFX90A-NEXT:    v_mov_b32_e32 v8, v3
1887; GFX90A-NEXT:    v_mov_b32_e32 v9, v4
1888; GFX90A-NEXT:    image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da
1889; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1890; GFX90A-NEXT:    ; return to shader part epilog
1891;
1892; GFX10PLUS-LABEL: atomic_add_i32_2darraymsaa:
1893; GFX10PLUS:       ; %bb.0: ; %main_body
1894; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1895; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1896; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1897; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1898; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1899; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1900; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1901; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1902; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
1903; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1904; GFX10PLUS-NEXT:    ; return to shader part epilog
1905;
1906; GFX12-LABEL: atomic_add_i32_2darraymsaa:
1907; GFX12:       ; %bb.0: ; %main_body
1908; GFX12-NEXT:    s_mov_b32 s0, s2
1909; GFX12-NEXT:    s_mov_b32 s1, s3
1910; GFX12-NEXT:    s_mov_b32 s2, s4
1911; GFX12-NEXT:    s_mov_b32 s3, s5
1912; GFX12-NEXT:    s_mov_b32 s4, s6
1913; GFX12-NEXT:    s_mov_b32 s5, s7
1914; GFX12-NEXT:    s_mov_b32 s6, s8
1915; GFX12-NEXT:    s_mov_b32 s7, s9
1916; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
1917; GFX12-NEXT:    s_wait_loadcnt 0x0
1918; GFX12-NEXT:    ; return to shader part epilog
1919main_body:
1920  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1921  %out = bitcast i32 %v to float
1922  ret float %out
1923}
1924
1925define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
1926; GFX6-LABEL: atomic_add_i32_1d_slc:
1927; GFX6:       ; %bb.0: ; %main_body
1928; GFX6-NEXT:    s_mov_b32 s0, s2
1929; GFX6-NEXT:    s_mov_b32 s1, s3
1930; GFX6-NEXT:    s_mov_b32 s2, s4
1931; GFX6-NEXT:    s_mov_b32 s3, s5
1932; GFX6-NEXT:    s_mov_b32 s4, s6
1933; GFX6-NEXT:    s_mov_b32 s5, s7
1934; GFX6-NEXT:    s_mov_b32 s6, s8
1935; GFX6-NEXT:    s_mov_b32 s7, s9
1936; GFX6-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1937; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1938; GFX6-NEXT:    ; return to shader part epilog
1939;
1940; GFX8-LABEL: atomic_add_i32_1d_slc:
1941; GFX8:       ; %bb.0: ; %main_body
1942; GFX8-NEXT:    s_mov_b32 s0, s2
1943; GFX8-NEXT:    s_mov_b32 s1, s3
1944; GFX8-NEXT:    s_mov_b32 s2, s4
1945; GFX8-NEXT:    s_mov_b32 s3, s5
1946; GFX8-NEXT:    s_mov_b32 s4, s6
1947; GFX8-NEXT:    s_mov_b32 s5, s7
1948; GFX8-NEXT:    s_mov_b32 s6, s8
1949; GFX8-NEXT:    s_mov_b32 s7, s9
1950; GFX8-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1951; GFX8-NEXT:    s_waitcnt vmcnt(0)
1952; GFX8-NEXT:    ; return to shader part epilog
1953;
1954; GFX900-LABEL: atomic_add_i32_1d_slc:
1955; GFX900:       ; %bb.0: ; %main_body
1956; GFX900-NEXT:    s_mov_b32 s0, s2
1957; GFX900-NEXT:    s_mov_b32 s1, s3
1958; GFX900-NEXT:    s_mov_b32 s2, s4
1959; GFX900-NEXT:    s_mov_b32 s3, s5
1960; GFX900-NEXT:    s_mov_b32 s4, s6
1961; GFX900-NEXT:    s_mov_b32 s5, s7
1962; GFX900-NEXT:    s_mov_b32 s6, s8
1963; GFX900-NEXT:    s_mov_b32 s7, s9
1964; GFX900-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
1965; GFX900-NEXT:    s_waitcnt vmcnt(0)
1966; GFX900-NEXT:    ; return to shader part epilog
1967;
1968; GFX90A-LABEL: atomic_add_i32_1d_slc:
1969; GFX90A:       ; %bb.0: ; %main_body
1970; GFX90A-NEXT:    s_mov_b32 s0, s2
1971; GFX90A-NEXT:    s_mov_b32 s1, s3
1972; GFX90A-NEXT:    s_mov_b32 s2, s4
1973; GFX90A-NEXT:    s_mov_b32 s3, s5
1974; GFX90A-NEXT:    s_mov_b32 s4, s6
1975; GFX90A-NEXT:    s_mov_b32 s5, s7
1976; GFX90A-NEXT:    s_mov_b32 s6, s8
1977; GFX90A-NEXT:    s_mov_b32 s7, s9
1978; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
1979; GFX90A-NEXT:    image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc
1980; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1981; GFX90A-NEXT:    ; return to shader part epilog
1982;
1983; GFX10PLUS-LABEL: atomic_add_i32_1d_slc:
1984; GFX10PLUS:       ; %bb.0: ; %main_body
1985; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1986; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1987; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1988; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1989; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1990; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1991; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1992; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1993; GFX10PLUS-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
1994; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1995; GFX10PLUS-NEXT:    ; return to shader part epilog
1996;
1997; GFX12-LABEL: atomic_add_i32_1d_slc:
1998; GFX12:       ; %bb.0: ; %main_body
1999; GFX12-NEXT:    s_mov_b32 s0, s2
2000; GFX12-NEXT:    s_mov_b32 s1, s3
2001; GFX12-NEXT:    s_mov_b32 s2, s4
2002; GFX12-NEXT:    s_mov_b32 s3, s5
2003; GFX12-NEXT:    s_mov_b32 s4, s6
2004; GFX12-NEXT:    s_mov_b32 s5, s7
2005; GFX12-NEXT:    s_mov_b32 s6, s8
2006; GFX12-NEXT:    s_mov_b32 s7, s9
2007; GFX12-NEXT:    image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
2008; GFX12-NEXT:    s_wait_loadcnt 0x0
2009; GFX12-NEXT:    ; return to shader part epilog
2010main_body:
2011  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
2012  %out = bitcast i32 %v to float
2013  ret float %out
2014}
2015
2016define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2017; GFX6-LABEL: atomic_swap_i64_1d:
2018; GFX6:       ; %bb.0: ; %main_body
2019; GFX6-NEXT:    s_mov_b32 s0, s2
2020; GFX6-NEXT:    s_mov_b32 s1, s3
2021; GFX6-NEXT:    s_mov_b32 s2, s4
2022; GFX6-NEXT:    s_mov_b32 s3, s5
2023; GFX6-NEXT:    s_mov_b32 s4, s6
2024; GFX6-NEXT:    s_mov_b32 s5, s7
2025; GFX6-NEXT:    s_mov_b32 s6, s8
2026; GFX6-NEXT:    s_mov_b32 s7, s9
2027; GFX6-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2028; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2029; GFX6-NEXT:    ; return to shader part epilog
2030;
2031; GFX8-LABEL: atomic_swap_i64_1d:
2032; GFX8:       ; %bb.0: ; %main_body
2033; GFX8-NEXT:    s_mov_b32 s0, s2
2034; GFX8-NEXT:    s_mov_b32 s1, s3
2035; GFX8-NEXT:    s_mov_b32 s2, s4
2036; GFX8-NEXT:    s_mov_b32 s3, s5
2037; GFX8-NEXT:    s_mov_b32 s4, s6
2038; GFX8-NEXT:    s_mov_b32 s5, s7
2039; GFX8-NEXT:    s_mov_b32 s6, s8
2040; GFX8-NEXT:    s_mov_b32 s7, s9
2041; GFX8-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2042; GFX8-NEXT:    s_waitcnt vmcnt(0)
2043; GFX8-NEXT:    ; return to shader part epilog
2044;
2045; GFX900-LABEL: atomic_swap_i64_1d:
2046; GFX900:       ; %bb.0: ; %main_body
2047; GFX900-NEXT:    s_mov_b32 s0, s2
2048; GFX900-NEXT:    s_mov_b32 s1, s3
2049; GFX900-NEXT:    s_mov_b32 s2, s4
2050; GFX900-NEXT:    s_mov_b32 s3, s5
2051; GFX900-NEXT:    s_mov_b32 s4, s6
2052; GFX900-NEXT:    s_mov_b32 s5, s7
2053; GFX900-NEXT:    s_mov_b32 s6, s8
2054; GFX900-NEXT:    s_mov_b32 s7, s9
2055; GFX900-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2056; GFX900-NEXT:    s_waitcnt vmcnt(0)
2057; GFX900-NEXT:    ; return to shader part epilog
2058;
2059; GFX90A-LABEL: atomic_swap_i64_1d:
2060; GFX90A:       ; %bb.0: ; %main_body
2061; GFX90A-NEXT:    s_mov_b32 s0, s2
2062; GFX90A-NEXT:    s_mov_b32 s1, s3
2063; GFX90A-NEXT:    s_mov_b32 s2, s4
2064; GFX90A-NEXT:    s_mov_b32 s3, s5
2065; GFX90A-NEXT:    s_mov_b32 s4, s6
2066; GFX90A-NEXT:    s_mov_b32 s5, s7
2067; GFX90A-NEXT:    s_mov_b32 s6, s8
2068; GFX90A-NEXT:    s_mov_b32 s7, s9
2069; GFX90A-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2070; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2071; GFX90A-NEXT:    ; return to shader part epilog
2072;
2073; GFX10PLUS-LABEL: atomic_swap_i64_1d:
2074; GFX10PLUS:       ; %bb.0: ; %main_body
2075; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2076; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2077; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2078; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2079; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2080; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2081; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2082; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2083; GFX10PLUS-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2084; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2085; GFX10PLUS-NEXT:    ; return to shader part epilog
2086;
2087; GFX12-LABEL: atomic_swap_i64_1d:
2088; GFX12:       ; %bb.0: ; %main_body
2089; GFX12-NEXT:    s_mov_b32 s0, s2
2090; GFX12-NEXT:    s_mov_b32 s1, s3
2091; GFX12-NEXT:    s_mov_b32 s2, s4
2092; GFX12-NEXT:    s_mov_b32 s3, s5
2093; GFX12-NEXT:    s_mov_b32 s4, s6
2094; GFX12-NEXT:    s_mov_b32 s5, s7
2095; GFX12-NEXT:    s_mov_b32 s6, s8
2096; GFX12-NEXT:    s_mov_b32 s7, s9
2097; GFX12-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2098; GFX12-NEXT:    s_wait_loadcnt 0x0
2099; GFX12-NEXT:    ; return to shader part epilog
2100main_body:
2101  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2102  %out = bitcast i64 %v to <2 x float>
2103  ret <2 x float> %out
2104}
2105
2106define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2107; GFX6-LABEL: atomic_add_i64_1d:
2108; GFX6:       ; %bb.0: ; %main_body
2109; GFX6-NEXT:    s_mov_b32 s0, s2
2110; GFX6-NEXT:    s_mov_b32 s1, s3
2111; GFX6-NEXT:    s_mov_b32 s2, s4
2112; GFX6-NEXT:    s_mov_b32 s3, s5
2113; GFX6-NEXT:    s_mov_b32 s4, s6
2114; GFX6-NEXT:    s_mov_b32 s5, s7
2115; GFX6-NEXT:    s_mov_b32 s6, s8
2116; GFX6-NEXT:    s_mov_b32 s7, s9
2117; GFX6-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2118; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2119; GFX6-NEXT:    ; return to shader part epilog
2120;
2121; GFX8-LABEL: atomic_add_i64_1d:
2122; GFX8:       ; %bb.0: ; %main_body
2123; GFX8-NEXT:    s_mov_b32 s0, s2
2124; GFX8-NEXT:    s_mov_b32 s1, s3
2125; GFX8-NEXT:    s_mov_b32 s2, s4
2126; GFX8-NEXT:    s_mov_b32 s3, s5
2127; GFX8-NEXT:    s_mov_b32 s4, s6
2128; GFX8-NEXT:    s_mov_b32 s5, s7
2129; GFX8-NEXT:    s_mov_b32 s6, s8
2130; GFX8-NEXT:    s_mov_b32 s7, s9
2131; GFX8-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2132; GFX8-NEXT:    s_waitcnt vmcnt(0)
2133; GFX8-NEXT:    ; return to shader part epilog
2134;
2135; GFX900-LABEL: atomic_add_i64_1d:
2136; GFX900:       ; %bb.0: ; %main_body
2137; GFX900-NEXT:    s_mov_b32 s0, s2
2138; GFX900-NEXT:    s_mov_b32 s1, s3
2139; GFX900-NEXT:    s_mov_b32 s2, s4
2140; GFX900-NEXT:    s_mov_b32 s3, s5
2141; GFX900-NEXT:    s_mov_b32 s4, s6
2142; GFX900-NEXT:    s_mov_b32 s5, s7
2143; GFX900-NEXT:    s_mov_b32 s6, s8
2144; GFX900-NEXT:    s_mov_b32 s7, s9
2145; GFX900-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2146; GFX900-NEXT:    s_waitcnt vmcnt(0)
2147; GFX900-NEXT:    ; return to shader part epilog
2148;
2149; GFX90A-LABEL: atomic_add_i64_1d:
2150; GFX90A:       ; %bb.0: ; %main_body
2151; GFX90A-NEXT:    s_mov_b32 s0, s2
2152; GFX90A-NEXT:    s_mov_b32 s1, s3
2153; GFX90A-NEXT:    s_mov_b32 s2, s4
2154; GFX90A-NEXT:    s_mov_b32 s3, s5
2155; GFX90A-NEXT:    s_mov_b32 s4, s6
2156; GFX90A-NEXT:    s_mov_b32 s5, s7
2157; GFX90A-NEXT:    s_mov_b32 s6, s8
2158; GFX90A-NEXT:    s_mov_b32 s7, s9
2159; GFX90A-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2160; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2161; GFX90A-NEXT:    ; return to shader part epilog
2162;
2163; GFX10PLUS-LABEL: atomic_add_i64_1d:
2164; GFX10PLUS:       ; %bb.0: ; %main_body
2165; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2166; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2167; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2168; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2169; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2170; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2171; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2172; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2173; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2174; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2175; GFX10PLUS-NEXT:    ; return to shader part epilog
2176;
2177; GFX12-LABEL: atomic_add_i64_1d:
2178; GFX12:       ; %bb.0: ; %main_body
2179; GFX12-NEXT:    s_mov_b32 s0, s2
2180; GFX12-NEXT:    s_mov_b32 s1, s3
2181; GFX12-NEXT:    s_mov_b32 s2, s4
2182; GFX12-NEXT:    s_mov_b32 s3, s5
2183; GFX12-NEXT:    s_mov_b32 s4, s6
2184; GFX12-NEXT:    s_mov_b32 s5, s7
2185; GFX12-NEXT:    s_mov_b32 s6, s8
2186; GFX12-NEXT:    s_mov_b32 s7, s9
2187; GFX12-NEXT:    image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2188; GFX12-NEXT:    s_wait_loadcnt 0x0
2189; GFX12-NEXT:    ; return to shader part epilog
2190main_body:
2191  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2192  %out = bitcast i64 %v to <2 x float>
2193  ret <2 x float> %out
2194}
2195
2196define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2197; GFX6-LABEL: atomic_sub_i64_1d:
2198; GFX6:       ; %bb.0: ; %main_body
2199; GFX6-NEXT:    s_mov_b32 s0, s2
2200; GFX6-NEXT:    s_mov_b32 s1, s3
2201; GFX6-NEXT:    s_mov_b32 s2, s4
2202; GFX6-NEXT:    s_mov_b32 s3, s5
2203; GFX6-NEXT:    s_mov_b32 s4, s6
2204; GFX6-NEXT:    s_mov_b32 s5, s7
2205; GFX6-NEXT:    s_mov_b32 s6, s8
2206; GFX6-NEXT:    s_mov_b32 s7, s9
2207; GFX6-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2208; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2209; GFX6-NEXT:    ; return to shader part epilog
2210;
2211; GFX8-LABEL: atomic_sub_i64_1d:
2212; GFX8:       ; %bb.0: ; %main_body
2213; GFX8-NEXT:    s_mov_b32 s0, s2
2214; GFX8-NEXT:    s_mov_b32 s1, s3
2215; GFX8-NEXT:    s_mov_b32 s2, s4
2216; GFX8-NEXT:    s_mov_b32 s3, s5
2217; GFX8-NEXT:    s_mov_b32 s4, s6
2218; GFX8-NEXT:    s_mov_b32 s5, s7
2219; GFX8-NEXT:    s_mov_b32 s6, s8
2220; GFX8-NEXT:    s_mov_b32 s7, s9
2221; GFX8-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2222; GFX8-NEXT:    s_waitcnt vmcnt(0)
2223; GFX8-NEXT:    ; return to shader part epilog
2224;
2225; GFX900-LABEL: atomic_sub_i64_1d:
2226; GFX900:       ; %bb.0: ; %main_body
2227; GFX900-NEXT:    s_mov_b32 s0, s2
2228; GFX900-NEXT:    s_mov_b32 s1, s3
2229; GFX900-NEXT:    s_mov_b32 s2, s4
2230; GFX900-NEXT:    s_mov_b32 s3, s5
2231; GFX900-NEXT:    s_mov_b32 s4, s6
2232; GFX900-NEXT:    s_mov_b32 s5, s7
2233; GFX900-NEXT:    s_mov_b32 s6, s8
2234; GFX900-NEXT:    s_mov_b32 s7, s9
2235; GFX900-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2236; GFX900-NEXT:    s_waitcnt vmcnt(0)
2237; GFX900-NEXT:    ; return to shader part epilog
2238;
2239; GFX90A-LABEL: atomic_sub_i64_1d:
2240; GFX90A:       ; %bb.0: ; %main_body
2241; GFX90A-NEXT:    s_mov_b32 s0, s2
2242; GFX90A-NEXT:    s_mov_b32 s1, s3
2243; GFX90A-NEXT:    s_mov_b32 s2, s4
2244; GFX90A-NEXT:    s_mov_b32 s3, s5
2245; GFX90A-NEXT:    s_mov_b32 s4, s6
2246; GFX90A-NEXT:    s_mov_b32 s5, s7
2247; GFX90A-NEXT:    s_mov_b32 s6, s8
2248; GFX90A-NEXT:    s_mov_b32 s7, s9
2249; GFX90A-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2250; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2251; GFX90A-NEXT:    ; return to shader part epilog
2252;
2253; GFX10PLUS-LABEL: atomic_sub_i64_1d:
2254; GFX10PLUS:       ; %bb.0: ; %main_body
2255; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2256; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2257; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2258; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2259; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2260; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2261; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2262; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2263; GFX10PLUS-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2264; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2265; GFX10PLUS-NEXT:    ; return to shader part epilog
2266;
2267; GFX12-LABEL: atomic_sub_i64_1d:
2268; GFX12:       ; %bb.0: ; %main_body
2269; GFX12-NEXT:    s_mov_b32 s0, s2
2270; GFX12-NEXT:    s_mov_b32 s1, s3
2271; GFX12-NEXT:    s_mov_b32 s2, s4
2272; GFX12-NEXT:    s_mov_b32 s3, s5
2273; GFX12-NEXT:    s_mov_b32 s4, s6
2274; GFX12-NEXT:    s_mov_b32 s5, s7
2275; GFX12-NEXT:    s_mov_b32 s6, s8
2276; GFX12-NEXT:    s_mov_b32 s7, s9
2277; GFX12-NEXT:    image_atomic_sub_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2278; GFX12-NEXT:    s_wait_loadcnt 0x0
2279; GFX12-NEXT:    ; return to shader part epilog
2280main_body:
2281  %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2282  %out = bitcast i64 %v to <2 x float>
2283  ret <2 x float> %out
2284}
2285
2286define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2287; GFX6-LABEL: atomic_smin_i64_1d:
2288; GFX6:       ; %bb.0: ; %main_body
2289; GFX6-NEXT:    s_mov_b32 s0, s2
2290; GFX6-NEXT:    s_mov_b32 s1, s3
2291; GFX6-NEXT:    s_mov_b32 s2, s4
2292; GFX6-NEXT:    s_mov_b32 s3, s5
2293; GFX6-NEXT:    s_mov_b32 s4, s6
2294; GFX6-NEXT:    s_mov_b32 s5, s7
2295; GFX6-NEXT:    s_mov_b32 s6, s8
2296; GFX6-NEXT:    s_mov_b32 s7, s9
2297; GFX6-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2298; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2299; GFX6-NEXT:    ; return to shader part epilog
2300;
2301; GFX8-LABEL: atomic_smin_i64_1d:
2302; GFX8:       ; %bb.0: ; %main_body
2303; GFX8-NEXT:    s_mov_b32 s0, s2
2304; GFX8-NEXT:    s_mov_b32 s1, s3
2305; GFX8-NEXT:    s_mov_b32 s2, s4
2306; GFX8-NEXT:    s_mov_b32 s3, s5
2307; GFX8-NEXT:    s_mov_b32 s4, s6
2308; GFX8-NEXT:    s_mov_b32 s5, s7
2309; GFX8-NEXT:    s_mov_b32 s6, s8
2310; GFX8-NEXT:    s_mov_b32 s7, s9
2311; GFX8-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2312; GFX8-NEXT:    s_waitcnt vmcnt(0)
2313; GFX8-NEXT:    ; return to shader part epilog
2314;
2315; GFX900-LABEL: atomic_smin_i64_1d:
2316; GFX900:       ; %bb.0: ; %main_body
2317; GFX900-NEXT:    s_mov_b32 s0, s2
2318; GFX900-NEXT:    s_mov_b32 s1, s3
2319; GFX900-NEXT:    s_mov_b32 s2, s4
2320; GFX900-NEXT:    s_mov_b32 s3, s5
2321; GFX900-NEXT:    s_mov_b32 s4, s6
2322; GFX900-NEXT:    s_mov_b32 s5, s7
2323; GFX900-NEXT:    s_mov_b32 s6, s8
2324; GFX900-NEXT:    s_mov_b32 s7, s9
2325; GFX900-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2326; GFX900-NEXT:    s_waitcnt vmcnt(0)
2327; GFX900-NEXT:    ; return to shader part epilog
2328;
2329; GFX90A-LABEL: atomic_smin_i64_1d:
2330; GFX90A:       ; %bb.0: ; %main_body
2331; GFX90A-NEXT:    s_mov_b32 s0, s2
2332; GFX90A-NEXT:    s_mov_b32 s1, s3
2333; GFX90A-NEXT:    s_mov_b32 s2, s4
2334; GFX90A-NEXT:    s_mov_b32 s3, s5
2335; GFX90A-NEXT:    s_mov_b32 s4, s6
2336; GFX90A-NEXT:    s_mov_b32 s5, s7
2337; GFX90A-NEXT:    s_mov_b32 s6, s8
2338; GFX90A-NEXT:    s_mov_b32 s7, s9
2339; GFX90A-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2340; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2341; GFX90A-NEXT:    ; return to shader part epilog
2342;
2343; GFX10PLUS-LABEL: atomic_smin_i64_1d:
2344; GFX10PLUS:       ; %bb.0: ; %main_body
2345; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2346; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2347; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2348; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2349; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2350; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2351; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2352; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2353; GFX10PLUS-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2354; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2355; GFX10PLUS-NEXT:    ; return to shader part epilog
2356;
2357; GFX12-LABEL: atomic_smin_i64_1d:
2358; GFX12:       ; %bb.0: ; %main_body
2359; GFX12-NEXT:    s_mov_b32 s0, s2
2360; GFX12-NEXT:    s_mov_b32 s1, s3
2361; GFX12-NEXT:    s_mov_b32 s2, s4
2362; GFX12-NEXT:    s_mov_b32 s3, s5
2363; GFX12-NEXT:    s_mov_b32 s4, s6
2364; GFX12-NEXT:    s_mov_b32 s5, s7
2365; GFX12-NEXT:    s_mov_b32 s6, s8
2366; GFX12-NEXT:    s_mov_b32 s7, s9
2367; GFX12-NEXT:    image_atomic_min_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2368; GFX12-NEXT:    s_wait_loadcnt 0x0
2369; GFX12-NEXT:    ; return to shader part epilog
2370main_body:
2371  %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2372  %out = bitcast i64 %v to <2 x float>
2373  ret <2 x float> %out
2374}
2375
2376define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2377; GFX6-LABEL: atomic_umin_i64_1d:
2378; GFX6:       ; %bb.0: ; %main_body
2379; GFX6-NEXT:    s_mov_b32 s0, s2
2380; GFX6-NEXT:    s_mov_b32 s1, s3
2381; GFX6-NEXT:    s_mov_b32 s2, s4
2382; GFX6-NEXT:    s_mov_b32 s3, s5
2383; GFX6-NEXT:    s_mov_b32 s4, s6
2384; GFX6-NEXT:    s_mov_b32 s5, s7
2385; GFX6-NEXT:    s_mov_b32 s6, s8
2386; GFX6-NEXT:    s_mov_b32 s7, s9
2387; GFX6-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2388; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2389; GFX6-NEXT:    ; return to shader part epilog
2390;
2391; GFX8-LABEL: atomic_umin_i64_1d:
2392; GFX8:       ; %bb.0: ; %main_body
2393; GFX8-NEXT:    s_mov_b32 s0, s2
2394; GFX8-NEXT:    s_mov_b32 s1, s3
2395; GFX8-NEXT:    s_mov_b32 s2, s4
2396; GFX8-NEXT:    s_mov_b32 s3, s5
2397; GFX8-NEXT:    s_mov_b32 s4, s6
2398; GFX8-NEXT:    s_mov_b32 s5, s7
2399; GFX8-NEXT:    s_mov_b32 s6, s8
2400; GFX8-NEXT:    s_mov_b32 s7, s9
2401; GFX8-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2402; GFX8-NEXT:    s_waitcnt vmcnt(0)
2403; GFX8-NEXT:    ; return to shader part epilog
2404;
2405; GFX900-LABEL: atomic_umin_i64_1d:
2406; GFX900:       ; %bb.0: ; %main_body
2407; GFX900-NEXT:    s_mov_b32 s0, s2
2408; GFX900-NEXT:    s_mov_b32 s1, s3
2409; GFX900-NEXT:    s_mov_b32 s2, s4
2410; GFX900-NEXT:    s_mov_b32 s3, s5
2411; GFX900-NEXT:    s_mov_b32 s4, s6
2412; GFX900-NEXT:    s_mov_b32 s5, s7
2413; GFX900-NEXT:    s_mov_b32 s6, s8
2414; GFX900-NEXT:    s_mov_b32 s7, s9
2415; GFX900-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2416; GFX900-NEXT:    s_waitcnt vmcnt(0)
2417; GFX900-NEXT:    ; return to shader part epilog
2418;
2419; GFX90A-LABEL: atomic_umin_i64_1d:
2420; GFX90A:       ; %bb.0: ; %main_body
2421; GFX90A-NEXT:    s_mov_b32 s0, s2
2422; GFX90A-NEXT:    s_mov_b32 s1, s3
2423; GFX90A-NEXT:    s_mov_b32 s2, s4
2424; GFX90A-NEXT:    s_mov_b32 s3, s5
2425; GFX90A-NEXT:    s_mov_b32 s4, s6
2426; GFX90A-NEXT:    s_mov_b32 s5, s7
2427; GFX90A-NEXT:    s_mov_b32 s6, s8
2428; GFX90A-NEXT:    s_mov_b32 s7, s9
2429; GFX90A-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2430; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2431; GFX90A-NEXT:    ; return to shader part epilog
2432;
2433; GFX10PLUS-LABEL: atomic_umin_i64_1d:
2434; GFX10PLUS:       ; %bb.0: ; %main_body
2435; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2436; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2437; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2438; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2439; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2440; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2441; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2442; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2443; GFX10PLUS-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2444; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2445; GFX10PLUS-NEXT:    ; return to shader part epilog
2446;
2447; GFX12-LABEL: atomic_umin_i64_1d:
2448; GFX12:       ; %bb.0: ; %main_body
2449; GFX12-NEXT:    s_mov_b32 s0, s2
2450; GFX12-NEXT:    s_mov_b32 s1, s3
2451; GFX12-NEXT:    s_mov_b32 s2, s4
2452; GFX12-NEXT:    s_mov_b32 s3, s5
2453; GFX12-NEXT:    s_mov_b32 s4, s6
2454; GFX12-NEXT:    s_mov_b32 s5, s7
2455; GFX12-NEXT:    s_mov_b32 s6, s8
2456; GFX12-NEXT:    s_mov_b32 s7, s9
2457; GFX12-NEXT:    image_atomic_min_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2458; GFX12-NEXT:    s_wait_loadcnt 0x0
2459; GFX12-NEXT:    ; return to shader part epilog
2460main_body:
2461  %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2462  %out = bitcast i64 %v to <2 x float>
2463  ret <2 x float> %out
2464}
2465
2466define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2467; GFX6-LABEL: atomic_smax_i64_1d:
2468; GFX6:       ; %bb.0: ; %main_body
2469; GFX6-NEXT:    s_mov_b32 s0, s2
2470; GFX6-NEXT:    s_mov_b32 s1, s3
2471; GFX6-NEXT:    s_mov_b32 s2, s4
2472; GFX6-NEXT:    s_mov_b32 s3, s5
2473; GFX6-NEXT:    s_mov_b32 s4, s6
2474; GFX6-NEXT:    s_mov_b32 s5, s7
2475; GFX6-NEXT:    s_mov_b32 s6, s8
2476; GFX6-NEXT:    s_mov_b32 s7, s9
2477; GFX6-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2478; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2479; GFX6-NEXT:    ; return to shader part epilog
2480;
2481; GFX8-LABEL: atomic_smax_i64_1d:
2482; GFX8:       ; %bb.0: ; %main_body
2483; GFX8-NEXT:    s_mov_b32 s0, s2
2484; GFX8-NEXT:    s_mov_b32 s1, s3
2485; GFX8-NEXT:    s_mov_b32 s2, s4
2486; GFX8-NEXT:    s_mov_b32 s3, s5
2487; GFX8-NEXT:    s_mov_b32 s4, s6
2488; GFX8-NEXT:    s_mov_b32 s5, s7
2489; GFX8-NEXT:    s_mov_b32 s6, s8
2490; GFX8-NEXT:    s_mov_b32 s7, s9
2491; GFX8-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2492; GFX8-NEXT:    s_waitcnt vmcnt(0)
2493; GFX8-NEXT:    ; return to shader part epilog
2494;
2495; GFX900-LABEL: atomic_smax_i64_1d:
2496; GFX900:       ; %bb.0: ; %main_body
2497; GFX900-NEXT:    s_mov_b32 s0, s2
2498; GFX900-NEXT:    s_mov_b32 s1, s3
2499; GFX900-NEXT:    s_mov_b32 s2, s4
2500; GFX900-NEXT:    s_mov_b32 s3, s5
2501; GFX900-NEXT:    s_mov_b32 s4, s6
2502; GFX900-NEXT:    s_mov_b32 s5, s7
2503; GFX900-NEXT:    s_mov_b32 s6, s8
2504; GFX900-NEXT:    s_mov_b32 s7, s9
2505; GFX900-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2506; GFX900-NEXT:    s_waitcnt vmcnt(0)
2507; GFX900-NEXT:    ; return to shader part epilog
2508;
2509; GFX90A-LABEL: atomic_smax_i64_1d:
2510; GFX90A:       ; %bb.0: ; %main_body
2511; GFX90A-NEXT:    s_mov_b32 s0, s2
2512; GFX90A-NEXT:    s_mov_b32 s1, s3
2513; GFX90A-NEXT:    s_mov_b32 s2, s4
2514; GFX90A-NEXT:    s_mov_b32 s3, s5
2515; GFX90A-NEXT:    s_mov_b32 s4, s6
2516; GFX90A-NEXT:    s_mov_b32 s5, s7
2517; GFX90A-NEXT:    s_mov_b32 s6, s8
2518; GFX90A-NEXT:    s_mov_b32 s7, s9
2519; GFX90A-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2520; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2521; GFX90A-NEXT:    ; return to shader part epilog
2522;
2523; GFX10PLUS-LABEL: atomic_smax_i64_1d:
2524; GFX10PLUS:       ; %bb.0: ; %main_body
2525; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2526; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2527; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2528; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2529; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2530; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2531; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2532; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2533; GFX10PLUS-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2534; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2535; GFX10PLUS-NEXT:    ; return to shader part epilog
2536;
2537; GFX12-LABEL: atomic_smax_i64_1d:
2538; GFX12:       ; %bb.0: ; %main_body
2539; GFX12-NEXT:    s_mov_b32 s0, s2
2540; GFX12-NEXT:    s_mov_b32 s1, s3
2541; GFX12-NEXT:    s_mov_b32 s2, s4
2542; GFX12-NEXT:    s_mov_b32 s3, s5
2543; GFX12-NEXT:    s_mov_b32 s4, s6
2544; GFX12-NEXT:    s_mov_b32 s5, s7
2545; GFX12-NEXT:    s_mov_b32 s6, s8
2546; GFX12-NEXT:    s_mov_b32 s7, s9
2547; GFX12-NEXT:    image_atomic_max_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2548; GFX12-NEXT:    s_wait_loadcnt 0x0
2549; GFX12-NEXT:    ; return to shader part epilog
2550main_body:
2551  %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2552  %out = bitcast i64 %v to <2 x float>
2553  ret <2 x float> %out
2554}
2555
2556define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2557; GFX6-LABEL: atomic_umax_i64_1d:
2558; GFX6:       ; %bb.0: ; %main_body
2559; GFX6-NEXT:    s_mov_b32 s0, s2
2560; GFX6-NEXT:    s_mov_b32 s1, s3
2561; GFX6-NEXT:    s_mov_b32 s2, s4
2562; GFX6-NEXT:    s_mov_b32 s3, s5
2563; GFX6-NEXT:    s_mov_b32 s4, s6
2564; GFX6-NEXT:    s_mov_b32 s5, s7
2565; GFX6-NEXT:    s_mov_b32 s6, s8
2566; GFX6-NEXT:    s_mov_b32 s7, s9
2567; GFX6-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2568; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2569; GFX6-NEXT:    ; return to shader part epilog
2570;
2571; GFX8-LABEL: atomic_umax_i64_1d:
2572; GFX8:       ; %bb.0: ; %main_body
2573; GFX8-NEXT:    s_mov_b32 s0, s2
2574; GFX8-NEXT:    s_mov_b32 s1, s3
2575; GFX8-NEXT:    s_mov_b32 s2, s4
2576; GFX8-NEXT:    s_mov_b32 s3, s5
2577; GFX8-NEXT:    s_mov_b32 s4, s6
2578; GFX8-NEXT:    s_mov_b32 s5, s7
2579; GFX8-NEXT:    s_mov_b32 s6, s8
2580; GFX8-NEXT:    s_mov_b32 s7, s9
2581; GFX8-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2582; GFX8-NEXT:    s_waitcnt vmcnt(0)
2583; GFX8-NEXT:    ; return to shader part epilog
2584;
2585; GFX900-LABEL: atomic_umax_i64_1d:
2586; GFX900:       ; %bb.0: ; %main_body
2587; GFX900-NEXT:    s_mov_b32 s0, s2
2588; GFX900-NEXT:    s_mov_b32 s1, s3
2589; GFX900-NEXT:    s_mov_b32 s2, s4
2590; GFX900-NEXT:    s_mov_b32 s3, s5
2591; GFX900-NEXT:    s_mov_b32 s4, s6
2592; GFX900-NEXT:    s_mov_b32 s5, s7
2593; GFX900-NEXT:    s_mov_b32 s6, s8
2594; GFX900-NEXT:    s_mov_b32 s7, s9
2595; GFX900-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2596; GFX900-NEXT:    s_waitcnt vmcnt(0)
2597; GFX900-NEXT:    ; return to shader part epilog
2598;
2599; GFX90A-LABEL: atomic_umax_i64_1d:
2600; GFX90A:       ; %bb.0: ; %main_body
2601; GFX90A-NEXT:    s_mov_b32 s0, s2
2602; GFX90A-NEXT:    s_mov_b32 s1, s3
2603; GFX90A-NEXT:    s_mov_b32 s2, s4
2604; GFX90A-NEXT:    s_mov_b32 s3, s5
2605; GFX90A-NEXT:    s_mov_b32 s4, s6
2606; GFX90A-NEXT:    s_mov_b32 s5, s7
2607; GFX90A-NEXT:    s_mov_b32 s6, s8
2608; GFX90A-NEXT:    s_mov_b32 s7, s9
2609; GFX90A-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2610; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2611; GFX90A-NEXT:    ; return to shader part epilog
2612;
2613; GFX10PLUS-LABEL: atomic_umax_i64_1d:
2614; GFX10PLUS:       ; %bb.0: ; %main_body
2615; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2616; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2617; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2618; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2619; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2620; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2621; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2622; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2623; GFX10PLUS-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2624; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2625; GFX10PLUS-NEXT:    ; return to shader part epilog
2626;
2627; GFX12-LABEL: atomic_umax_i64_1d:
2628; GFX12:       ; %bb.0: ; %main_body
2629; GFX12-NEXT:    s_mov_b32 s0, s2
2630; GFX12-NEXT:    s_mov_b32 s1, s3
2631; GFX12-NEXT:    s_mov_b32 s2, s4
2632; GFX12-NEXT:    s_mov_b32 s3, s5
2633; GFX12-NEXT:    s_mov_b32 s4, s6
2634; GFX12-NEXT:    s_mov_b32 s5, s7
2635; GFX12-NEXT:    s_mov_b32 s6, s8
2636; GFX12-NEXT:    s_mov_b32 s7, s9
2637; GFX12-NEXT:    image_atomic_max_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2638; GFX12-NEXT:    s_wait_loadcnt 0x0
2639; GFX12-NEXT:    ; return to shader part epilog
2640main_body:
2641  %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2642  %out = bitcast i64 %v to <2 x float>
2643  ret <2 x float> %out
2644}
2645
2646define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2647; GFX6-LABEL: atomic_and_i64_1d:
2648; GFX6:       ; %bb.0: ; %main_body
2649; GFX6-NEXT:    s_mov_b32 s0, s2
2650; GFX6-NEXT:    s_mov_b32 s1, s3
2651; GFX6-NEXT:    s_mov_b32 s2, s4
2652; GFX6-NEXT:    s_mov_b32 s3, s5
2653; GFX6-NEXT:    s_mov_b32 s4, s6
2654; GFX6-NEXT:    s_mov_b32 s5, s7
2655; GFX6-NEXT:    s_mov_b32 s6, s8
2656; GFX6-NEXT:    s_mov_b32 s7, s9
2657; GFX6-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2658; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2659; GFX6-NEXT:    ; return to shader part epilog
2660;
2661; GFX8-LABEL: atomic_and_i64_1d:
2662; GFX8:       ; %bb.0: ; %main_body
2663; GFX8-NEXT:    s_mov_b32 s0, s2
2664; GFX8-NEXT:    s_mov_b32 s1, s3
2665; GFX8-NEXT:    s_mov_b32 s2, s4
2666; GFX8-NEXT:    s_mov_b32 s3, s5
2667; GFX8-NEXT:    s_mov_b32 s4, s6
2668; GFX8-NEXT:    s_mov_b32 s5, s7
2669; GFX8-NEXT:    s_mov_b32 s6, s8
2670; GFX8-NEXT:    s_mov_b32 s7, s9
2671; GFX8-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2672; GFX8-NEXT:    s_waitcnt vmcnt(0)
2673; GFX8-NEXT:    ; return to shader part epilog
2674;
2675; GFX900-LABEL: atomic_and_i64_1d:
2676; GFX900:       ; %bb.0: ; %main_body
2677; GFX900-NEXT:    s_mov_b32 s0, s2
2678; GFX900-NEXT:    s_mov_b32 s1, s3
2679; GFX900-NEXT:    s_mov_b32 s2, s4
2680; GFX900-NEXT:    s_mov_b32 s3, s5
2681; GFX900-NEXT:    s_mov_b32 s4, s6
2682; GFX900-NEXT:    s_mov_b32 s5, s7
2683; GFX900-NEXT:    s_mov_b32 s6, s8
2684; GFX900-NEXT:    s_mov_b32 s7, s9
2685; GFX900-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2686; GFX900-NEXT:    s_waitcnt vmcnt(0)
2687; GFX900-NEXT:    ; return to shader part epilog
2688;
2689; GFX90A-LABEL: atomic_and_i64_1d:
2690; GFX90A:       ; %bb.0: ; %main_body
2691; GFX90A-NEXT:    s_mov_b32 s0, s2
2692; GFX90A-NEXT:    s_mov_b32 s1, s3
2693; GFX90A-NEXT:    s_mov_b32 s2, s4
2694; GFX90A-NEXT:    s_mov_b32 s3, s5
2695; GFX90A-NEXT:    s_mov_b32 s4, s6
2696; GFX90A-NEXT:    s_mov_b32 s5, s7
2697; GFX90A-NEXT:    s_mov_b32 s6, s8
2698; GFX90A-NEXT:    s_mov_b32 s7, s9
2699; GFX90A-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2700; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2701; GFX90A-NEXT:    ; return to shader part epilog
2702;
2703; GFX10PLUS-LABEL: atomic_and_i64_1d:
2704; GFX10PLUS:       ; %bb.0: ; %main_body
2705; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2706; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2707; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2708; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2709; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2710; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2711; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2712; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2713; GFX10PLUS-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2714; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2715; GFX10PLUS-NEXT:    ; return to shader part epilog
2716;
2717; GFX12-LABEL: atomic_and_i64_1d:
2718; GFX12:       ; %bb.0: ; %main_body
2719; GFX12-NEXT:    s_mov_b32 s0, s2
2720; GFX12-NEXT:    s_mov_b32 s1, s3
2721; GFX12-NEXT:    s_mov_b32 s2, s4
2722; GFX12-NEXT:    s_mov_b32 s3, s5
2723; GFX12-NEXT:    s_mov_b32 s4, s6
2724; GFX12-NEXT:    s_mov_b32 s5, s7
2725; GFX12-NEXT:    s_mov_b32 s6, s8
2726; GFX12-NEXT:    s_mov_b32 s7, s9
2727; GFX12-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2728; GFX12-NEXT:    s_wait_loadcnt 0x0
2729; GFX12-NEXT:    ; return to shader part epilog
2730main_body:
2731  %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2732  %out = bitcast i64 %v to <2 x float>
2733  ret <2 x float> %out
2734}
2735
2736define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2737; GFX6-LABEL: atomic_or_i64_1d:
2738; GFX6:       ; %bb.0: ; %main_body
2739; GFX6-NEXT:    s_mov_b32 s0, s2
2740; GFX6-NEXT:    s_mov_b32 s1, s3
2741; GFX6-NEXT:    s_mov_b32 s2, s4
2742; GFX6-NEXT:    s_mov_b32 s3, s5
2743; GFX6-NEXT:    s_mov_b32 s4, s6
2744; GFX6-NEXT:    s_mov_b32 s5, s7
2745; GFX6-NEXT:    s_mov_b32 s6, s8
2746; GFX6-NEXT:    s_mov_b32 s7, s9
2747; GFX6-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2748; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2749; GFX6-NEXT:    ; return to shader part epilog
2750;
2751; GFX8-LABEL: atomic_or_i64_1d:
2752; GFX8:       ; %bb.0: ; %main_body
2753; GFX8-NEXT:    s_mov_b32 s0, s2
2754; GFX8-NEXT:    s_mov_b32 s1, s3
2755; GFX8-NEXT:    s_mov_b32 s2, s4
2756; GFX8-NEXT:    s_mov_b32 s3, s5
2757; GFX8-NEXT:    s_mov_b32 s4, s6
2758; GFX8-NEXT:    s_mov_b32 s5, s7
2759; GFX8-NEXT:    s_mov_b32 s6, s8
2760; GFX8-NEXT:    s_mov_b32 s7, s9
2761; GFX8-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2762; GFX8-NEXT:    s_waitcnt vmcnt(0)
2763; GFX8-NEXT:    ; return to shader part epilog
2764;
2765; GFX900-LABEL: atomic_or_i64_1d:
2766; GFX900:       ; %bb.0: ; %main_body
2767; GFX900-NEXT:    s_mov_b32 s0, s2
2768; GFX900-NEXT:    s_mov_b32 s1, s3
2769; GFX900-NEXT:    s_mov_b32 s2, s4
2770; GFX900-NEXT:    s_mov_b32 s3, s5
2771; GFX900-NEXT:    s_mov_b32 s4, s6
2772; GFX900-NEXT:    s_mov_b32 s5, s7
2773; GFX900-NEXT:    s_mov_b32 s6, s8
2774; GFX900-NEXT:    s_mov_b32 s7, s9
2775; GFX900-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2776; GFX900-NEXT:    s_waitcnt vmcnt(0)
2777; GFX900-NEXT:    ; return to shader part epilog
2778;
2779; GFX90A-LABEL: atomic_or_i64_1d:
2780; GFX90A:       ; %bb.0: ; %main_body
2781; GFX90A-NEXT:    s_mov_b32 s0, s2
2782; GFX90A-NEXT:    s_mov_b32 s1, s3
2783; GFX90A-NEXT:    s_mov_b32 s2, s4
2784; GFX90A-NEXT:    s_mov_b32 s3, s5
2785; GFX90A-NEXT:    s_mov_b32 s4, s6
2786; GFX90A-NEXT:    s_mov_b32 s5, s7
2787; GFX90A-NEXT:    s_mov_b32 s6, s8
2788; GFX90A-NEXT:    s_mov_b32 s7, s9
2789; GFX90A-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2790; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2791; GFX90A-NEXT:    ; return to shader part epilog
2792;
2793; GFX10PLUS-LABEL: atomic_or_i64_1d:
2794; GFX10PLUS:       ; %bb.0: ; %main_body
2795; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2796; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2797; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2798; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2799; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2800; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2801; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2802; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2803; GFX10PLUS-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2804; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2805; GFX10PLUS-NEXT:    ; return to shader part epilog
2806;
2807; GFX12-LABEL: atomic_or_i64_1d:
2808; GFX12:       ; %bb.0: ; %main_body
2809; GFX12-NEXT:    s_mov_b32 s0, s2
2810; GFX12-NEXT:    s_mov_b32 s1, s3
2811; GFX12-NEXT:    s_mov_b32 s2, s4
2812; GFX12-NEXT:    s_mov_b32 s3, s5
2813; GFX12-NEXT:    s_mov_b32 s4, s6
2814; GFX12-NEXT:    s_mov_b32 s5, s7
2815; GFX12-NEXT:    s_mov_b32 s6, s8
2816; GFX12-NEXT:    s_mov_b32 s7, s9
2817; GFX12-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2818; GFX12-NEXT:    s_wait_loadcnt 0x0
2819; GFX12-NEXT:    ; return to shader part epilog
2820main_body:
2821  %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2822  %out = bitcast i64 %v to <2 x float>
2823  ret <2 x float> %out
2824}
2825
2826define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2827; GFX6-LABEL: atomic_xor_i64_1d:
2828; GFX6:       ; %bb.0: ; %main_body
2829; GFX6-NEXT:    s_mov_b32 s0, s2
2830; GFX6-NEXT:    s_mov_b32 s1, s3
2831; GFX6-NEXT:    s_mov_b32 s2, s4
2832; GFX6-NEXT:    s_mov_b32 s3, s5
2833; GFX6-NEXT:    s_mov_b32 s4, s6
2834; GFX6-NEXT:    s_mov_b32 s5, s7
2835; GFX6-NEXT:    s_mov_b32 s6, s8
2836; GFX6-NEXT:    s_mov_b32 s7, s9
2837; GFX6-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2838; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2839; GFX6-NEXT:    ; return to shader part epilog
2840;
2841; GFX8-LABEL: atomic_xor_i64_1d:
2842; GFX8:       ; %bb.0: ; %main_body
2843; GFX8-NEXT:    s_mov_b32 s0, s2
2844; GFX8-NEXT:    s_mov_b32 s1, s3
2845; GFX8-NEXT:    s_mov_b32 s2, s4
2846; GFX8-NEXT:    s_mov_b32 s3, s5
2847; GFX8-NEXT:    s_mov_b32 s4, s6
2848; GFX8-NEXT:    s_mov_b32 s5, s7
2849; GFX8-NEXT:    s_mov_b32 s6, s8
2850; GFX8-NEXT:    s_mov_b32 s7, s9
2851; GFX8-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2852; GFX8-NEXT:    s_waitcnt vmcnt(0)
2853; GFX8-NEXT:    ; return to shader part epilog
2854;
2855; GFX900-LABEL: atomic_xor_i64_1d:
2856; GFX900:       ; %bb.0: ; %main_body
2857; GFX900-NEXT:    s_mov_b32 s0, s2
2858; GFX900-NEXT:    s_mov_b32 s1, s3
2859; GFX900-NEXT:    s_mov_b32 s2, s4
2860; GFX900-NEXT:    s_mov_b32 s3, s5
2861; GFX900-NEXT:    s_mov_b32 s4, s6
2862; GFX900-NEXT:    s_mov_b32 s5, s7
2863; GFX900-NEXT:    s_mov_b32 s6, s8
2864; GFX900-NEXT:    s_mov_b32 s7, s9
2865; GFX900-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2866; GFX900-NEXT:    s_waitcnt vmcnt(0)
2867; GFX900-NEXT:    ; return to shader part epilog
2868;
2869; GFX90A-LABEL: atomic_xor_i64_1d:
2870; GFX90A:       ; %bb.0: ; %main_body
2871; GFX90A-NEXT:    s_mov_b32 s0, s2
2872; GFX90A-NEXT:    s_mov_b32 s1, s3
2873; GFX90A-NEXT:    s_mov_b32 s2, s4
2874; GFX90A-NEXT:    s_mov_b32 s3, s5
2875; GFX90A-NEXT:    s_mov_b32 s4, s6
2876; GFX90A-NEXT:    s_mov_b32 s5, s7
2877; GFX90A-NEXT:    s_mov_b32 s6, s8
2878; GFX90A-NEXT:    s_mov_b32 s7, s9
2879; GFX90A-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2880; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2881; GFX90A-NEXT:    ; return to shader part epilog
2882;
2883; GFX10PLUS-LABEL: atomic_xor_i64_1d:
2884; GFX10PLUS:       ; %bb.0: ; %main_body
2885; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2886; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2887; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2888; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2889; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2890; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2891; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2892; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2893; GFX10PLUS-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2894; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2895; GFX10PLUS-NEXT:    ; return to shader part epilog
2896;
2897; GFX12-LABEL: atomic_xor_i64_1d:
2898; GFX12:       ; %bb.0: ; %main_body
2899; GFX12-NEXT:    s_mov_b32 s0, s2
2900; GFX12-NEXT:    s_mov_b32 s1, s3
2901; GFX12-NEXT:    s_mov_b32 s2, s4
2902; GFX12-NEXT:    s_mov_b32 s3, s5
2903; GFX12-NEXT:    s_mov_b32 s4, s6
2904; GFX12-NEXT:    s_mov_b32 s5, s7
2905; GFX12-NEXT:    s_mov_b32 s6, s8
2906; GFX12-NEXT:    s_mov_b32 s7, s9
2907; GFX12-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2908; GFX12-NEXT:    s_wait_loadcnt 0x0
2909; GFX12-NEXT:    ; return to shader part epilog
2910main_body:
2911  %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
2912  %out = bitcast i64 %v to <2 x float>
2913  ret <2 x float> %out
2914}
2915
2916define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
2917; GFX6-LABEL: atomic_inc_i64_1d:
2918; GFX6:       ; %bb.0: ; %main_body
2919; GFX6-NEXT:    s_mov_b32 s0, s2
2920; GFX6-NEXT:    s_mov_b32 s1, s3
2921; GFX6-NEXT:    s_mov_b32 s2, s4
2922; GFX6-NEXT:    s_mov_b32 s3, s5
2923; GFX6-NEXT:    s_mov_b32 s4, s6
2924; GFX6-NEXT:    s_mov_b32 s5, s7
2925; GFX6-NEXT:    s_mov_b32 s6, s8
2926; GFX6-NEXT:    s_mov_b32 s7, s9
2927; GFX6-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2928; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2929; GFX6-NEXT:    ; return to shader part epilog
2930;
2931; GFX8-LABEL: atomic_inc_i64_1d:
2932; GFX8:       ; %bb.0: ; %main_body
2933; GFX8-NEXT:    s_mov_b32 s0, s2
2934; GFX8-NEXT:    s_mov_b32 s1, s3
2935; GFX8-NEXT:    s_mov_b32 s2, s4
2936; GFX8-NEXT:    s_mov_b32 s3, s5
2937; GFX8-NEXT:    s_mov_b32 s4, s6
2938; GFX8-NEXT:    s_mov_b32 s5, s7
2939; GFX8-NEXT:    s_mov_b32 s6, s8
2940; GFX8-NEXT:    s_mov_b32 s7, s9
2941; GFX8-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2942; GFX8-NEXT:    s_waitcnt vmcnt(0)
2943; GFX8-NEXT:    ; return to shader part epilog
2944;
2945; GFX900-LABEL: atomic_inc_i64_1d:
2946; GFX900:       ; %bb.0: ; %main_body
2947; GFX900-NEXT:    s_mov_b32 s0, s2
2948; GFX900-NEXT:    s_mov_b32 s1, s3
2949; GFX900-NEXT:    s_mov_b32 s2, s4
2950; GFX900-NEXT:    s_mov_b32 s3, s5
2951; GFX900-NEXT:    s_mov_b32 s4, s6
2952; GFX900-NEXT:    s_mov_b32 s5, s7
2953; GFX900-NEXT:    s_mov_b32 s6, s8
2954; GFX900-NEXT:    s_mov_b32 s7, s9
2955; GFX900-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2956; GFX900-NEXT:    s_waitcnt vmcnt(0)
2957; GFX900-NEXT:    ; return to shader part epilog
2958;
2959; GFX90A-LABEL: atomic_inc_i64_1d:
2960; GFX90A:       ; %bb.0: ; %main_body
2961; GFX90A-NEXT:    s_mov_b32 s0, s2
2962; GFX90A-NEXT:    s_mov_b32 s1, s3
2963; GFX90A-NEXT:    s_mov_b32 s2, s4
2964; GFX90A-NEXT:    s_mov_b32 s3, s5
2965; GFX90A-NEXT:    s_mov_b32 s4, s6
2966; GFX90A-NEXT:    s_mov_b32 s5, s7
2967; GFX90A-NEXT:    s_mov_b32 s6, s8
2968; GFX90A-NEXT:    s_mov_b32 s7, s9
2969; GFX90A-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
2970; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2971; GFX90A-NEXT:    ; return to shader part epilog
2972;
2973; GFX10PLUS-LABEL: atomic_inc_i64_1d:
2974; GFX10PLUS:       ; %bb.0: ; %main_body
2975; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2976; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2977; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2978; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2979; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2980; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2981; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2982; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2983; GFX10PLUS-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
2984; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2985; GFX10PLUS-NEXT:    ; return to shader part epilog
2986;
2987; GFX12-LABEL: atomic_inc_i64_1d:
2988; GFX12:       ; %bb.0: ; %main_body
2989; GFX12-NEXT:    s_mov_b32 s0, s2
2990; GFX12-NEXT:    s_mov_b32 s1, s3
2991; GFX12-NEXT:    s_mov_b32 s2, s4
2992; GFX12-NEXT:    s_mov_b32 s3, s5
2993; GFX12-NEXT:    s_mov_b32 s4, s6
2994; GFX12-NEXT:    s_mov_b32 s5, s7
2995; GFX12-NEXT:    s_mov_b32 s6, s8
2996; GFX12-NEXT:    s_mov_b32 s7, s9
2997; GFX12-NEXT:    image_atomic_inc_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
2998; GFX12-NEXT:    s_wait_loadcnt 0x0
2999; GFX12-NEXT:    ; return to shader part epilog
3000main_body:
3001  %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3002  %out = bitcast i64 %v to <2 x float>
3003  ret <2 x float> %out
3004}
3005
3006define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3007; GFX6-LABEL: atomic_dec_i64_1d:
3008; GFX6:       ; %bb.0: ; %main_body
3009; GFX6-NEXT:    s_mov_b32 s0, s2
3010; GFX6-NEXT:    s_mov_b32 s1, s3
3011; GFX6-NEXT:    s_mov_b32 s2, s4
3012; GFX6-NEXT:    s_mov_b32 s3, s5
3013; GFX6-NEXT:    s_mov_b32 s4, s6
3014; GFX6-NEXT:    s_mov_b32 s5, s7
3015; GFX6-NEXT:    s_mov_b32 s6, s8
3016; GFX6-NEXT:    s_mov_b32 s7, s9
3017; GFX6-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3018; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3019; GFX6-NEXT:    ; return to shader part epilog
3020;
3021; GFX8-LABEL: atomic_dec_i64_1d:
3022; GFX8:       ; %bb.0: ; %main_body
3023; GFX8-NEXT:    s_mov_b32 s0, s2
3024; GFX8-NEXT:    s_mov_b32 s1, s3
3025; GFX8-NEXT:    s_mov_b32 s2, s4
3026; GFX8-NEXT:    s_mov_b32 s3, s5
3027; GFX8-NEXT:    s_mov_b32 s4, s6
3028; GFX8-NEXT:    s_mov_b32 s5, s7
3029; GFX8-NEXT:    s_mov_b32 s6, s8
3030; GFX8-NEXT:    s_mov_b32 s7, s9
3031; GFX8-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3032; GFX8-NEXT:    s_waitcnt vmcnt(0)
3033; GFX8-NEXT:    ; return to shader part epilog
3034;
3035; GFX900-LABEL: atomic_dec_i64_1d:
3036; GFX900:       ; %bb.0: ; %main_body
3037; GFX900-NEXT:    s_mov_b32 s0, s2
3038; GFX900-NEXT:    s_mov_b32 s1, s3
3039; GFX900-NEXT:    s_mov_b32 s2, s4
3040; GFX900-NEXT:    s_mov_b32 s3, s5
3041; GFX900-NEXT:    s_mov_b32 s4, s6
3042; GFX900-NEXT:    s_mov_b32 s5, s7
3043; GFX900-NEXT:    s_mov_b32 s6, s8
3044; GFX900-NEXT:    s_mov_b32 s7, s9
3045; GFX900-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3046; GFX900-NEXT:    s_waitcnt vmcnt(0)
3047; GFX900-NEXT:    ; return to shader part epilog
3048;
3049; GFX90A-LABEL: atomic_dec_i64_1d:
3050; GFX90A:       ; %bb.0: ; %main_body
3051; GFX90A-NEXT:    s_mov_b32 s0, s2
3052; GFX90A-NEXT:    s_mov_b32 s1, s3
3053; GFX90A-NEXT:    s_mov_b32 s2, s4
3054; GFX90A-NEXT:    s_mov_b32 s3, s5
3055; GFX90A-NEXT:    s_mov_b32 s4, s6
3056; GFX90A-NEXT:    s_mov_b32 s5, s7
3057; GFX90A-NEXT:    s_mov_b32 s6, s8
3058; GFX90A-NEXT:    s_mov_b32 s7, s9
3059; GFX90A-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
3060; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3061; GFX90A-NEXT:    ; return to shader part epilog
3062;
3063; GFX10PLUS-LABEL: atomic_dec_i64_1d:
3064; GFX10PLUS:       ; %bb.0: ; %main_body
3065; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3066; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3067; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3068; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3069; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3070; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3071; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3072; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3073; GFX10PLUS-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
3074; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3075; GFX10PLUS-NEXT:    ; return to shader part epilog
3076;
3077; GFX12-LABEL: atomic_dec_i64_1d:
3078; GFX12:       ; %bb.0: ; %main_body
3079; GFX12-NEXT:    s_mov_b32 s0, s2
3080; GFX12-NEXT:    s_mov_b32 s1, s3
3081; GFX12-NEXT:    s_mov_b32 s2, s4
3082; GFX12-NEXT:    s_mov_b32 s3, s5
3083; GFX12-NEXT:    s_mov_b32 s4, s6
3084; GFX12-NEXT:    s_mov_b32 s5, s7
3085; GFX12-NEXT:    s_mov_b32 s6, s8
3086; GFX12-NEXT:    s_mov_b32 s7, s9
3087; GFX12-NEXT:    image_atomic_dec_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3088; GFX12-NEXT:    s_wait_loadcnt 0x0
3089; GFX12-NEXT:    ; return to shader part epilog
3090main_body:
3091  %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3092  %out = bitcast i64 %v to <2 x float>
3093  ret <2 x float> %out
3094}
3095
3096define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
3097; GFX6-LABEL: atomic_cmpswap_i64_1d:
3098; GFX6:       ; %bb.0: ; %main_body
3099; GFX6-NEXT:    s_mov_b32 s0, s2
3100; GFX6-NEXT:    s_mov_b32 s1, s3
3101; GFX6-NEXT:    s_mov_b32 s2, s4
3102; GFX6-NEXT:    s_mov_b32 s3, s5
3103; GFX6-NEXT:    s_mov_b32 s4, s6
3104; GFX6-NEXT:    s_mov_b32 s5, s7
3105; GFX6-NEXT:    s_mov_b32 s6, s8
3106; GFX6-NEXT:    s_mov_b32 s7, s9
3107; GFX6-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3108; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3109; GFX6-NEXT:    ; return to shader part epilog
3110;
3111; GFX8-LABEL: atomic_cmpswap_i64_1d:
3112; GFX8:       ; %bb.0: ; %main_body
3113; GFX8-NEXT:    s_mov_b32 s0, s2
3114; GFX8-NEXT:    s_mov_b32 s1, s3
3115; GFX8-NEXT:    s_mov_b32 s2, s4
3116; GFX8-NEXT:    s_mov_b32 s3, s5
3117; GFX8-NEXT:    s_mov_b32 s4, s6
3118; GFX8-NEXT:    s_mov_b32 s5, s7
3119; GFX8-NEXT:    s_mov_b32 s6, s8
3120; GFX8-NEXT:    s_mov_b32 s7, s9
3121; GFX8-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3122; GFX8-NEXT:    s_waitcnt vmcnt(0)
3123; GFX8-NEXT:    ; return to shader part epilog
3124;
3125; GFX900-LABEL: atomic_cmpswap_i64_1d:
3126; GFX900:       ; %bb.0: ; %main_body
3127; GFX900-NEXT:    s_mov_b32 s0, s2
3128; GFX900-NEXT:    s_mov_b32 s1, s3
3129; GFX900-NEXT:    s_mov_b32 s2, s4
3130; GFX900-NEXT:    s_mov_b32 s3, s5
3131; GFX900-NEXT:    s_mov_b32 s4, s6
3132; GFX900-NEXT:    s_mov_b32 s5, s7
3133; GFX900-NEXT:    s_mov_b32 s6, s8
3134; GFX900-NEXT:    s_mov_b32 s7, s9
3135; GFX900-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3136; GFX900-NEXT:    s_waitcnt vmcnt(0)
3137; GFX900-NEXT:    ; return to shader part epilog
3138;
3139; GFX90A-LABEL: atomic_cmpswap_i64_1d:
3140; GFX90A:       ; %bb.0: ; %main_body
3141; GFX90A-NEXT:    s_mov_b32 s0, s2
3142; GFX90A-NEXT:    s_mov_b32 s1, s3
3143; GFX90A-NEXT:    s_mov_b32 s2, s4
3144; GFX90A-NEXT:    s_mov_b32 s3, s5
3145; GFX90A-NEXT:    s_mov_b32 s4, s6
3146; GFX90A-NEXT:    s_mov_b32 s5, s7
3147; GFX90A-NEXT:    s_mov_b32 s6, s8
3148; GFX90A-NEXT:    s_mov_b32 s7, s9
3149; GFX90A-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3150; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3151; GFX90A-NEXT:    ; return to shader part epilog
3152;
3153; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d:
3154; GFX10PLUS:       ; %bb.0: ; %main_body
3155; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3156; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3157; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3158; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3159; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3160; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3161; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3162; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3163; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
3164; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3165; GFX10PLUS-NEXT:    ; return to shader part epilog
3166;
3167; GFX12-LABEL: atomic_cmpswap_i64_1d:
3168; GFX12:       ; %bb.0: ; %main_body
3169; GFX12-NEXT:    s_mov_b32 s0, s2
3170; GFX12-NEXT:    s_mov_b32 s1, s3
3171; GFX12-NEXT:    s_mov_b32 s2, s4
3172; GFX12-NEXT:    s_mov_b32 s3, s5
3173; GFX12-NEXT:    s_mov_b32 s4, s6
3174; GFX12-NEXT:    s_mov_b32 s5, s7
3175; GFX12-NEXT:    s_mov_b32 s6, s8
3176; GFX12-NEXT:    s_mov_b32 s7, s9
3177; GFX12-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3178; GFX12-NEXT:    s_wait_loadcnt 0x0
3179; GFX12-NEXT:    ; return to shader part epilog
3180main_body:
3181  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3182  %out = bitcast i64 %v to <2 x float>
3183  ret <2 x float> %out
3184}
3185
3186define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
3187; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
3188; GFX6:       ; %bb.0: ; %main_body
3189; GFX6-NEXT:    s_mov_b32 s0, s2
3190; GFX6-NEXT:    s_mov_b32 s1, s3
3191; GFX6-NEXT:    s_mov_b32 s2, s4
3192; GFX6-NEXT:    s_mov_b32 s3, s5
3193; GFX6-NEXT:    s_mov_b32 s4, s6
3194; GFX6-NEXT:    s_mov_b32 s5, s7
3195; GFX6-NEXT:    s_mov_b32 s6, s8
3196; GFX6-NEXT:    s_mov_b32 s7, s9
3197; GFX6-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3198; GFX6-NEXT:    s_endpgm
3199;
3200; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
3201; GFX8:       ; %bb.0: ; %main_body
3202; GFX8-NEXT:    s_mov_b32 s0, s2
3203; GFX8-NEXT:    s_mov_b32 s1, s3
3204; GFX8-NEXT:    s_mov_b32 s2, s4
3205; GFX8-NEXT:    s_mov_b32 s3, s5
3206; GFX8-NEXT:    s_mov_b32 s4, s6
3207; GFX8-NEXT:    s_mov_b32 s5, s7
3208; GFX8-NEXT:    s_mov_b32 s6, s8
3209; GFX8-NEXT:    s_mov_b32 s7, s9
3210; GFX8-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3211; GFX8-NEXT:    s_endpgm
3212;
3213; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
3214; GFX900:       ; %bb.0: ; %main_body
3215; GFX900-NEXT:    s_mov_b32 s0, s2
3216; GFX900-NEXT:    s_mov_b32 s1, s3
3217; GFX900-NEXT:    s_mov_b32 s2, s4
3218; GFX900-NEXT:    s_mov_b32 s3, s5
3219; GFX900-NEXT:    s_mov_b32 s4, s6
3220; GFX900-NEXT:    s_mov_b32 s5, s7
3221; GFX900-NEXT:    s_mov_b32 s6, s8
3222; GFX900-NEXT:    s_mov_b32 s7, s9
3223; GFX900-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3224; GFX900-NEXT:    s_endpgm
3225;
3226; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
3227; GFX90A:       ; %bb.0: ; %main_body
3228; GFX90A-NEXT:    s_mov_b32 s0, s2
3229; GFX90A-NEXT:    s_mov_b32 s1, s3
3230; GFX90A-NEXT:    s_mov_b32 s2, s4
3231; GFX90A-NEXT:    s_mov_b32 s3, s5
3232; GFX90A-NEXT:    s_mov_b32 s4, s6
3233; GFX90A-NEXT:    s_mov_b32 s5, s7
3234; GFX90A-NEXT:    s_mov_b32 s6, s8
3235; GFX90A-NEXT:    s_mov_b32 s7, s9
3236; GFX90A-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
3237; GFX90A-NEXT:    s_endpgm
3238;
3239; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
3240; GFX10PLUS:       ; %bb.0: ; %main_body
3241; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3242; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3243; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3244; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3245; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3246; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3247; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3248; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3249; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
3250; GFX10PLUS-NEXT:    s_endpgm
3251;
3252; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return:
3253; GFX12:       ; %bb.0: ; %main_body
3254; GFX12-NEXT:    s_mov_b32 s0, s2
3255; GFX12-NEXT:    s_mov_b32 s1, s3
3256; GFX12-NEXT:    s_mov_b32 s2, s4
3257; GFX12-NEXT:    s_mov_b32 s3, s5
3258; GFX12-NEXT:    s_mov_b32 s4, s6
3259; GFX12-NEXT:    s_mov_b32 s5, s7
3260; GFX12-NEXT:    s_mov_b32 s6, s8
3261; GFX12-NEXT:    s_mov_b32 s7, s9
3262; GFX12-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
3263; GFX12-NEXT:    s_endpgm
3264main_body:
3265  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
3266  ret void
3267}
3268
3269define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
3270; GFX6-LABEL: atomic_add_i64_2d:
3271; GFX6:       ; %bb.0: ; %main_body
3272; GFX6-NEXT:    s_mov_b32 s0, s2
3273; GFX6-NEXT:    s_mov_b32 s1, s3
3274; GFX6-NEXT:    s_mov_b32 s2, s4
3275; GFX6-NEXT:    s_mov_b32 s3, s5
3276; GFX6-NEXT:    s_mov_b32 s4, s6
3277; GFX6-NEXT:    s_mov_b32 s5, s7
3278; GFX6-NEXT:    s_mov_b32 s6, s8
3279; GFX6-NEXT:    s_mov_b32 s7, s9
3280; GFX6-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3281; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3282; GFX6-NEXT:    ; return to shader part epilog
3283;
3284; GFX8-LABEL: atomic_add_i64_2d:
3285; GFX8:       ; %bb.0: ; %main_body
3286; GFX8-NEXT:    s_mov_b32 s0, s2
3287; GFX8-NEXT:    s_mov_b32 s1, s3
3288; GFX8-NEXT:    s_mov_b32 s2, s4
3289; GFX8-NEXT:    s_mov_b32 s3, s5
3290; GFX8-NEXT:    s_mov_b32 s4, s6
3291; GFX8-NEXT:    s_mov_b32 s5, s7
3292; GFX8-NEXT:    s_mov_b32 s6, s8
3293; GFX8-NEXT:    s_mov_b32 s7, s9
3294; GFX8-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3295; GFX8-NEXT:    s_waitcnt vmcnt(0)
3296; GFX8-NEXT:    ; return to shader part epilog
3297;
3298; GFX900-LABEL: atomic_add_i64_2d:
3299; GFX900:       ; %bb.0: ; %main_body
3300; GFX900-NEXT:    s_mov_b32 s0, s2
3301; GFX900-NEXT:    s_mov_b32 s1, s3
3302; GFX900-NEXT:    s_mov_b32 s2, s4
3303; GFX900-NEXT:    s_mov_b32 s3, s5
3304; GFX900-NEXT:    s_mov_b32 s4, s6
3305; GFX900-NEXT:    s_mov_b32 s5, s7
3306; GFX900-NEXT:    s_mov_b32 s6, s8
3307; GFX900-NEXT:    s_mov_b32 s7, s9
3308; GFX900-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3309; GFX900-NEXT:    s_waitcnt vmcnt(0)
3310; GFX900-NEXT:    ; return to shader part epilog
3311;
3312; GFX90A-LABEL: atomic_add_i64_2d:
3313; GFX90A:       ; %bb.0: ; %main_body
3314; GFX90A-NEXT:    s_mov_b32 s0, s2
3315; GFX90A-NEXT:    s_mov_b32 s1, s3
3316; GFX90A-NEXT:    s_mov_b32 s2, s4
3317; GFX90A-NEXT:    s_mov_b32 s3, s5
3318; GFX90A-NEXT:    s_mov_b32 s4, s6
3319; GFX90A-NEXT:    s_mov_b32 s5, s7
3320; GFX90A-NEXT:    s_mov_b32 s6, s8
3321; GFX90A-NEXT:    s_mov_b32 s7, s9
3322; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
3323; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3324; GFX90A-NEXT:    ; return to shader part epilog
3325;
3326; GFX10PLUS-LABEL: atomic_add_i64_2d:
3327; GFX10PLUS:       ; %bb.0: ; %main_body
3328; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3329; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3330; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3331; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3332; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3333; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3334; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3335; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3336; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
3337; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3338; GFX10PLUS-NEXT:    ; return to shader part epilog
3339;
3340; GFX12-LABEL: atomic_add_i64_2d:
3341; GFX12:       ; %bb.0: ; %main_body
3342; GFX12-NEXT:    s_mov_b32 s0, s2
3343; GFX12-NEXT:    s_mov_b32 s1, s3
3344; GFX12-NEXT:    s_mov_b32 s2, s4
3345; GFX12-NEXT:    s_mov_b32 s3, s5
3346; GFX12-NEXT:    s_mov_b32 s4, s6
3347; GFX12-NEXT:    s_mov_b32 s5, s7
3348; GFX12-NEXT:    s_mov_b32 s6, s8
3349; GFX12-NEXT:    s_mov_b32 s7, s9
3350; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
3351; GFX12-NEXT:    s_wait_loadcnt 0x0
3352; GFX12-NEXT:    ; return to shader part epilog
3353main_body:
3354  %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
3355  %out = bitcast i64 %v to <2 x float>
3356  ret <2 x float> %out
3357}
3358
3359define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
3360; GFX6-LABEL: atomic_add_i64_3d:
3361; GFX6:       ; %bb.0: ; %main_body
3362; GFX6-NEXT:    s_mov_b32 s0, s2
3363; GFX6-NEXT:    s_mov_b32 s1, s3
3364; GFX6-NEXT:    s_mov_b32 s2, s4
3365; GFX6-NEXT:    s_mov_b32 s3, s5
3366; GFX6-NEXT:    s_mov_b32 s4, s6
3367; GFX6-NEXT:    s_mov_b32 s5, s7
3368; GFX6-NEXT:    s_mov_b32 s6, s8
3369; GFX6-NEXT:    s_mov_b32 s7, s9
3370; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3371; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3372; GFX6-NEXT:    ; return to shader part epilog
3373;
3374; GFX8-LABEL: atomic_add_i64_3d:
3375; GFX8:       ; %bb.0: ; %main_body
3376; GFX8-NEXT:    s_mov_b32 s0, s2
3377; GFX8-NEXT:    s_mov_b32 s1, s3
3378; GFX8-NEXT:    s_mov_b32 s2, s4
3379; GFX8-NEXT:    s_mov_b32 s3, s5
3380; GFX8-NEXT:    s_mov_b32 s4, s6
3381; GFX8-NEXT:    s_mov_b32 s5, s7
3382; GFX8-NEXT:    s_mov_b32 s6, s8
3383; GFX8-NEXT:    s_mov_b32 s7, s9
3384; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3385; GFX8-NEXT:    s_waitcnt vmcnt(0)
3386; GFX8-NEXT:    ; return to shader part epilog
3387;
3388; GFX900-LABEL: atomic_add_i64_3d:
3389; GFX900:       ; %bb.0: ; %main_body
3390; GFX900-NEXT:    s_mov_b32 s0, s2
3391; GFX900-NEXT:    s_mov_b32 s1, s3
3392; GFX900-NEXT:    s_mov_b32 s2, s4
3393; GFX900-NEXT:    s_mov_b32 s3, s5
3394; GFX900-NEXT:    s_mov_b32 s4, s6
3395; GFX900-NEXT:    s_mov_b32 s5, s7
3396; GFX900-NEXT:    s_mov_b32 s6, s8
3397; GFX900-NEXT:    s_mov_b32 s7, s9
3398; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3399; GFX900-NEXT:    s_waitcnt vmcnt(0)
3400; GFX900-NEXT:    ; return to shader part epilog
3401;
3402; GFX90A-LABEL: atomic_add_i64_3d:
3403; GFX90A:       ; %bb.0: ; %main_body
3404; GFX90A-NEXT:    s_mov_b32 s0, s2
3405; GFX90A-NEXT:    s_mov_b32 s1, s3
3406; GFX90A-NEXT:    s_mov_b32 s2, s4
3407; GFX90A-NEXT:    s_mov_b32 s3, s5
3408; GFX90A-NEXT:    s_mov_b32 s4, s6
3409; GFX90A-NEXT:    s_mov_b32 s5, s7
3410; GFX90A-NEXT:    s_mov_b32 s6, s8
3411; GFX90A-NEXT:    s_mov_b32 s7, s9
3412; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3413; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3414; GFX90A-NEXT:    ; return to shader part epilog
3415;
3416; GFX10PLUS-LABEL: atomic_add_i64_3d:
3417; GFX10PLUS:       ; %bb.0: ; %main_body
3418; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3419; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3420; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3421; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3422; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3423; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3424; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3425; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3426; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
3427; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3428; GFX10PLUS-NEXT:    ; return to shader part epilog
3429;
3430; GFX12-LABEL: atomic_add_i64_3d:
3431; GFX12:       ; %bb.0: ; %main_body
3432; GFX12-NEXT:    s_mov_b32 s0, s2
3433; GFX12-NEXT:    s_mov_b32 s1, s3
3434; GFX12-NEXT:    s_mov_b32 s2, s4
3435; GFX12-NEXT:    s_mov_b32 s3, s5
3436; GFX12-NEXT:    s_mov_b32 s4, s6
3437; GFX12-NEXT:    s_mov_b32 s5, s7
3438; GFX12-NEXT:    s_mov_b32 s6, s8
3439; GFX12-NEXT:    s_mov_b32 s7, s9
3440; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
3441; GFX12-NEXT:    s_wait_loadcnt 0x0
3442; GFX12-NEXT:    ; return to shader part epilog
3443main_body:
3444  %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
3445  %out = bitcast i64 %v to <2 x float>
3446  ret <2 x float> %out
3447}
3448
3449define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
3450; GFX6-LABEL: atomic_add_i64_cube:
3451; GFX6:       ; %bb.0: ; %main_body
3452; GFX6-NEXT:    s_mov_b32 s0, s2
3453; GFX6-NEXT:    s_mov_b32 s1, s3
3454; GFX6-NEXT:    s_mov_b32 s2, s4
3455; GFX6-NEXT:    s_mov_b32 s3, s5
3456; GFX6-NEXT:    s_mov_b32 s4, s6
3457; GFX6-NEXT:    s_mov_b32 s5, s7
3458; GFX6-NEXT:    s_mov_b32 s6, s8
3459; GFX6-NEXT:    s_mov_b32 s7, s9
3460; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3461; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3462; GFX6-NEXT:    ; return to shader part epilog
3463;
3464; GFX8-LABEL: atomic_add_i64_cube:
3465; GFX8:       ; %bb.0: ; %main_body
3466; GFX8-NEXT:    s_mov_b32 s0, s2
3467; GFX8-NEXT:    s_mov_b32 s1, s3
3468; GFX8-NEXT:    s_mov_b32 s2, s4
3469; GFX8-NEXT:    s_mov_b32 s3, s5
3470; GFX8-NEXT:    s_mov_b32 s4, s6
3471; GFX8-NEXT:    s_mov_b32 s5, s7
3472; GFX8-NEXT:    s_mov_b32 s6, s8
3473; GFX8-NEXT:    s_mov_b32 s7, s9
3474; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3475; GFX8-NEXT:    s_waitcnt vmcnt(0)
3476; GFX8-NEXT:    ; return to shader part epilog
3477;
3478; GFX900-LABEL: atomic_add_i64_cube:
3479; GFX900:       ; %bb.0: ; %main_body
3480; GFX900-NEXT:    s_mov_b32 s0, s2
3481; GFX900-NEXT:    s_mov_b32 s1, s3
3482; GFX900-NEXT:    s_mov_b32 s2, s4
3483; GFX900-NEXT:    s_mov_b32 s3, s5
3484; GFX900-NEXT:    s_mov_b32 s4, s6
3485; GFX900-NEXT:    s_mov_b32 s5, s7
3486; GFX900-NEXT:    s_mov_b32 s6, s8
3487; GFX900-NEXT:    s_mov_b32 s7, s9
3488; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3489; GFX900-NEXT:    s_waitcnt vmcnt(0)
3490; GFX900-NEXT:    ; return to shader part epilog
3491;
3492; GFX90A-LABEL: atomic_add_i64_cube:
3493; GFX90A:       ; %bb.0: ; %main_body
3494; GFX90A-NEXT:    s_mov_b32 s0, s2
3495; GFX90A-NEXT:    s_mov_b32 s1, s3
3496; GFX90A-NEXT:    s_mov_b32 s2, s4
3497; GFX90A-NEXT:    s_mov_b32 s3, s5
3498; GFX90A-NEXT:    s_mov_b32 s4, s6
3499; GFX90A-NEXT:    s_mov_b32 s5, s7
3500; GFX90A-NEXT:    s_mov_b32 s6, s8
3501; GFX90A-NEXT:    s_mov_b32 s7, s9
3502; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3503; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3504; GFX90A-NEXT:    ; return to shader part epilog
3505;
3506; GFX10PLUS-LABEL: atomic_add_i64_cube:
3507; GFX10PLUS:       ; %bb.0: ; %main_body
3508; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3509; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3510; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3511; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3512; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3513; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3514; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3515; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3516; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
3517; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3518; GFX10PLUS-NEXT:    ; return to shader part epilog
3519;
3520; GFX12-LABEL: atomic_add_i64_cube:
3521; GFX12:       ; %bb.0: ; %main_body
3522; GFX12-NEXT:    s_mov_b32 s0, s2
3523; GFX12-NEXT:    s_mov_b32 s1, s3
3524; GFX12-NEXT:    s_mov_b32 s2, s4
3525; GFX12-NEXT:    s_mov_b32 s3, s5
3526; GFX12-NEXT:    s_mov_b32 s4, s6
3527; GFX12-NEXT:    s_mov_b32 s5, s7
3528; GFX12-NEXT:    s_mov_b32 s6, s8
3529; GFX12-NEXT:    s_mov_b32 s7, s9
3530; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
3531; GFX12-NEXT:    s_wait_loadcnt 0x0
3532; GFX12-NEXT:    ; return to shader part epilog
3533main_body:
3534  %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
3535  %out = bitcast i64 %v to <2 x float>
3536  ret <2 x float> %out
3537}
3538
3539define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
3540; GFX6-LABEL: atomic_add_i64_1darray:
3541; GFX6:       ; %bb.0: ; %main_body
3542; GFX6-NEXT:    s_mov_b32 s0, s2
3543; GFX6-NEXT:    s_mov_b32 s1, s3
3544; GFX6-NEXT:    s_mov_b32 s2, s4
3545; GFX6-NEXT:    s_mov_b32 s3, s5
3546; GFX6-NEXT:    s_mov_b32 s4, s6
3547; GFX6-NEXT:    s_mov_b32 s5, s7
3548; GFX6-NEXT:    s_mov_b32 s6, s8
3549; GFX6-NEXT:    s_mov_b32 s7, s9
3550; GFX6-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3551; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3552; GFX6-NEXT:    ; return to shader part epilog
3553;
3554; GFX8-LABEL: atomic_add_i64_1darray:
3555; GFX8:       ; %bb.0: ; %main_body
3556; GFX8-NEXT:    s_mov_b32 s0, s2
3557; GFX8-NEXT:    s_mov_b32 s1, s3
3558; GFX8-NEXT:    s_mov_b32 s2, s4
3559; GFX8-NEXT:    s_mov_b32 s3, s5
3560; GFX8-NEXT:    s_mov_b32 s4, s6
3561; GFX8-NEXT:    s_mov_b32 s5, s7
3562; GFX8-NEXT:    s_mov_b32 s6, s8
3563; GFX8-NEXT:    s_mov_b32 s7, s9
3564; GFX8-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3565; GFX8-NEXT:    s_waitcnt vmcnt(0)
3566; GFX8-NEXT:    ; return to shader part epilog
3567;
3568; GFX900-LABEL: atomic_add_i64_1darray:
3569; GFX900:       ; %bb.0: ; %main_body
3570; GFX900-NEXT:    s_mov_b32 s0, s2
3571; GFX900-NEXT:    s_mov_b32 s1, s3
3572; GFX900-NEXT:    s_mov_b32 s2, s4
3573; GFX900-NEXT:    s_mov_b32 s3, s5
3574; GFX900-NEXT:    s_mov_b32 s4, s6
3575; GFX900-NEXT:    s_mov_b32 s5, s7
3576; GFX900-NEXT:    s_mov_b32 s6, s8
3577; GFX900-NEXT:    s_mov_b32 s7, s9
3578; GFX900-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3579; GFX900-NEXT:    s_waitcnt vmcnt(0)
3580; GFX900-NEXT:    ; return to shader part epilog
3581;
3582; GFX90A-LABEL: atomic_add_i64_1darray:
3583; GFX90A:       ; %bb.0: ; %main_body
3584; GFX90A-NEXT:    s_mov_b32 s0, s2
3585; GFX90A-NEXT:    s_mov_b32 s1, s3
3586; GFX90A-NEXT:    s_mov_b32 s2, s4
3587; GFX90A-NEXT:    s_mov_b32 s3, s5
3588; GFX90A-NEXT:    s_mov_b32 s4, s6
3589; GFX90A-NEXT:    s_mov_b32 s5, s7
3590; GFX90A-NEXT:    s_mov_b32 s6, s8
3591; GFX90A-NEXT:    s_mov_b32 s7, s9
3592; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
3593; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3594; GFX90A-NEXT:    ; return to shader part epilog
3595;
3596; GFX10PLUS-LABEL: atomic_add_i64_1darray:
3597; GFX10PLUS:       ; %bb.0: ; %main_body
3598; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3599; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3600; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3601; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3602; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3603; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3604; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3605; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3606; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
3607; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3608; GFX10PLUS-NEXT:    ; return to shader part epilog
3609;
3610; GFX12-LABEL: atomic_add_i64_1darray:
3611; GFX12:       ; %bb.0: ; %main_body
3612; GFX12-NEXT:    s_mov_b32 s0, s2
3613; GFX12-NEXT:    s_mov_b32 s1, s3
3614; GFX12-NEXT:    s_mov_b32 s2, s4
3615; GFX12-NEXT:    s_mov_b32 s3, s5
3616; GFX12-NEXT:    s_mov_b32 s4, s6
3617; GFX12-NEXT:    s_mov_b32 s5, s7
3618; GFX12-NEXT:    s_mov_b32 s6, s8
3619; GFX12-NEXT:    s_mov_b32 s7, s9
3620; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
3621; GFX12-NEXT:    s_wait_loadcnt 0x0
3622; GFX12-NEXT:    ; return to shader part epilog
3623main_body:
3624  %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3625  %out = bitcast i64 %v to <2 x float>
3626  ret <2 x float> %out
3627}
3628
3629define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
3630; GFX6-LABEL: atomic_add_i64_2darray:
3631; GFX6:       ; %bb.0: ; %main_body
3632; GFX6-NEXT:    s_mov_b32 s0, s2
3633; GFX6-NEXT:    s_mov_b32 s1, s3
3634; GFX6-NEXT:    s_mov_b32 s2, s4
3635; GFX6-NEXT:    s_mov_b32 s3, s5
3636; GFX6-NEXT:    s_mov_b32 s4, s6
3637; GFX6-NEXT:    s_mov_b32 s5, s7
3638; GFX6-NEXT:    s_mov_b32 s6, s8
3639; GFX6-NEXT:    s_mov_b32 s7, s9
3640; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3641; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3642; GFX6-NEXT:    ; return to shader part epilog
3643;
3644; GFX8-LABEL: atomic_add_i64_2darray:
3645; GFX8:       ; %bb.0: ; %main_body
3646; GFX8-NEXT:    s_mov_b32 s0, s2
3647; GFX8-NEXT:    s_mov_b32 s1, s3
3648; GFX8-NEXT:    s_mov_b32 s2, s4
3649; GFX8-NEXT:    s_mov_b32 s3, s5
3650; GFX8-NEXT:    s_mov_b32 s4, s6
3651; GFX8-NEXT:    s_mov_b32 s5, s7
3652; GFX8-NEXT:    s_mov_b32 s6, s8
3653; GFX8-NEXT:    s_mov_b32 s7, s9
3654; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3655; GFX8-NEXT:    s_waitcnt vmcnt(0)
3656; GFX8-NEXT:    ; return to shader part epilog
3657;
3658; GFX900-LABEL: atomic_add_i64_2darray:
3659; GFX900:       ; %bb.0: ; %main_body
3660; GFX900-NEXT:    s_mov_b32 s0, s2
3661; GFX900-NEXT:    s_mov_b32 s1, s3
3662; GFX900-NEXT:    s_mov_b32 s2, s4
3663; GFX900-NEXT:    s_mov_b32 s3, s5
3664; GFX900-NEXT:    s_mov_b32 s4, s6
3665; GFX900-NEXT:    s_mov_b32 s5, s7
3666; GFX900-NEXT:    s_mov_b32 s6, s8
3667; GFX900-NEXT:    s_mov_b32 s7, s9
3668; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3669; GFX900-NEXT:    s_waitcnt vmcnt(0)
3670; GFX900-NEXT:    ; return to shader part epilog
3671;
3672; GFX90A-LABEL: atomic_add_i64_2darray:
3673; GFX90A:       ; %bb.0: ; %main_body
3674; GFX90A-NEXT:    s_mov_b32 s0, s2
3675; GFX90A-NEXT:    s_mov_b32 s1, s3
3676; GFX90A-NEXT:    s_mov_b32 s2, s4
3677; GFX90A-NEXT:    s_mov_b32 s3, s5
3678; GFX90A-NEXT:    s_mov_b32 s4, s6
3679; GFX90A-NEXT:    s_mov_b32 s5, s7
3680; GFX90A-NEXT:    s_mov_b32 s6, s8
3681; GFX90A-NEXT:    s_mov_b32 s7, s9
3682; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
3683; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3684; GFX90A-NEXT:    ; return to shader part epilog
3685;
3686; GFX10PLUS-LABEL: atomic_add_i64_2darray:
3687; GFX10PLUS:       ; %bb.0: ; %main_body
3688; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3689; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3690; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3691; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3692; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3693; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3694; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3695; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3696; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
3697; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3698; GFX10PLUS-NEXT:    ; return to shader part epilog
3699;
3700; GFX12-LABEL: atomic_add_i64_2darray:
3701; GFX12:       ; %bb.0: ; %main_body
3702; GFX12-NEXT:    s_mov_b32 s0, s2
3703; GFX12-NEXT:    s_mov_b32 s1, s3
3704; GFX12-NEXT:    s_mov_b32 s2, s4
3705; GFX12-NEXT:    s_mov_b32 s3, s5
3706; GFX12-NEXT:    s_mov_b32 s4, s6
3707; GFX12-NEXT:    s_mov_b32 s5, s7
3708; GFX12-NEXT:    s_mov_b32 s6, s8
3709; GFX12-NEXT:    s_mov_b32 s7, s9
3710; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
3711; GFX12-NEXT:    s_wait_loadcnt 0x0
3712; GFX12-NEXT:    ; return to shader part epilog
3713main_body:
3714  %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
3715  %out = bitcast i64 %v to <2 x float>
3716  ret <2 x float> %out
3717}
3718
3719define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
3720; GFX6-LABEL: atomic_add_i64_2dmsaa:
3721; GFX6:       ; %bb.0: ; %main_body
3722; GFX6-NEXT:    s_mov_b32 s0, s2
3723; GFX6-NEXT:    s_mov_b32 s1, s3
3724; GFX6-NEXT:    s_mov_b32 s2, s4
3725; GFX6-NEXT:    s_mov_b32 s3, s5
3726; GFX6-NEXT:    s_mov_b32 s4, s6
3727; GFX6-NEXT:    s_mov_b32 s5, s7
3728; GFX6-NEXT:    s_mov_b32 s6, s8
3729; GFX6-NEXT:    s_mov_b32 s7, s9
3730; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3731; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3732; GFX6-NEXT:    ; return to shader part epilog
3733;
3734; GFX8-LABEL: atomic_add_i64_2dmsaa:
3735; GFX8:       ; %bb.0: ; %main_body
3736; GFX8-NEXT:    s_mov_b32 s0, s2
3737; GFX8-NEXT:    s_mov_b32 s1, s3
3738; GFX8-NEXT:    s_mov_b32 s2, s4
3739; GFX8-NEXT:    s_mov_b32 s3, s5
3740; GFX8-NEXT:    s_mov_b32 s4, s6
3741; GFX8-NEXT:    s_mov_b32 s5, s7
3742; GFX8-NEXT:    s_mov_b32 s6, s8
3743; GFX8-NEXT:    s_mov_b32 s7, s9
3744; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3745; GFX8-NEXT:    s_waitcnt vmcnt(0)
3746; GFX8-NEXT:    ; return to shader part epilog
3747;
3748; GFX900-LABEL: atomic_add_i64_2dmsaa:
3749; GFX900:       ; %bb.0: ; %main_body
3750; GFX900-NEXT:    s_mov_b32 s0, s2
3751; GFX900-NEXT:    s_mov_b32 s1, s3
3752; GFX900-NEXT:    s_mov_b32 s2, s4
3753; GFX900-NEXT:    s_mov_b32 s3, s5
3754; GFX900-NEXT:    s_mov_b32 s4, s6
3755; GFX900-NEXT:    s_mov_b32 s5, s7
3756; GFX900-NEXT:    s_mov_b32 s6, s8
3757; GFX900-NEXT:    s_mov_b32 s7, s9
3758; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3759; GFX900-NEXT:    s_waitcnt vmcnt(0)
3760; GFX900-NEXT:    ; return to shader part epilog
3761;
3762; GFX90A-LABEL: atomic_add_i64_2dmsaa:
3763; GFX90A:       ; %bb.0: ; %main_body
3764; GFX90A-NEXT:    s_mov_b32 s0, s2
3765; GFX90A-NEXT:    s_mov_b32 s1, s3
3766; GFX90A-NEXT:    s_mov_b32 s2, s4
3767; GFX90A-NEXT:    s_mov_b32 s3, s5
3768; GFX90A-NEXT:    s_mov_b32 s4, s6
3769; GFX90A-NEXT:    s_mov_b32 s5, s7
3770; GFX90A-NEXT:    s_mov_b32 s6, s8
3771; GFX90A-NEXT:    s_mov_b32 s7, s9
3772; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
3773; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3774; GFX90A-NEXT:    ; return to shader part epilog
3775;
3776; GFX10PLUS-LABEL: atomic_add_i64_2dmsaa:
3777; GFX10PLUS:       ; %bb.0: ; %main_body
3778; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3779; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3780; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3781; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3782; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3783; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3784; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3785; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3786; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
3787; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3788; GFX10PLUS-NEXT:    ; return to shader part epilog
3789;
3790; GFX12-LABEL: atomic_add_i64_2dmsaa:
3791; GFX12:       ; %bb.0: ; %main_body
3792; GFX12-NEXT:    s_mov_b32 s0, s2
3793; GFX12-NEXT:    s_mov_b32 s1, s3
3794; GFX12-NEXT:    s_mov_b32 s2, s4
3795; GFX12-NEXT:    s_mov_b32 s3, s5
3796; GFX12-NEXT:    s_mov_b32 s4, s6
3797; GFX12-NEXT:    s_mov_b32 s5, s7
3798; GFX12-NEXT:    s_mov_b32 s6, s8
3799; GFX12-NEXT:    s_mov_b32 s7, s9
3800; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
3801; GFX12-NEXT:    s_wait_loadcnt 0x0
3802; GFX12-NEXT:    ; return to shader part epilog
3803main_body:
3804  %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3805  %out = bitcast i64 %v to <2 x float>
3806  ret <2 x float> %out
3807}
3808
3809define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
3810; GFX6-LABEL: atomic_add_i64_2darraymsaa:
3811; GFX6:       ; %bb.0: ; %main_body
3812; GFX6-NEXT:    s_mov_b32 s0, s2
3813; GFX6-NEXT:    s_mov_b32 s1, s3
3814; GFX6-NEXT:    s_mov_b32 s2, s4
3815; GFX6-NEXT:    s_mov_b32 s3, s5
3816; GFX6-NEXT:    s_mov_b32 s4, s6
3817; GFX6-NEXT:    s_mov_b32 s5, s7
3818; GFX6-NEXT:    s_mov_b32 s6, s8
3819; GFX6-NEXT:    s_mov_b32 s7, s9
3820; GFX6-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3821; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3822; GFX6-NEXT:    ; return to shader part epilog
3823;
3824; GFX8-LABEL: atomic_add_i64_2darraymsaa:
3825; GFX8:       ; %bb.0: ; %main_body
3826; GFX8-NEXT:    s_mov_b32 s0, s2
3827; GFX8-NEXT:    s_mov_b32 s1, s3
3828; GFX8-NEXT:    s_mov_b32 s2, s4
3829; GFX8-NEXT:    s_mov_b32 s3, s5
3830; GFX8-NEXT:    s_mov_b32 s4, s6
3831; GFX8-NEXT:    s_mov_b32 s5, s7
3832; GFX8-NEXT:    s_mov_b32 s6, s8
3833; GFX8-NEXT:    s_mov_b32 s7, s9
3834; GFX8-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3835; GFX8-NEXT:    s_waitcnt vmcnt(0)
3836; GFX8-NEXT:    ; return to shader part epilog
3837;
3838; GFX900-LABEL: atomic_add_i64_2darraymsaa:
3839; GFX900:       ; %bb.0: ; %main_body
3840; GFX900-NEXT:    s_mov_b32 s0, s2
3841; GFX900-NEXT:    s_mov_b32 s1, s3
3842; GFX900-NEXT:    s_mov_b32 s2, s4
3843; GFX900-NEXT:    s_mov_b32 s3, s5
3844; GFX900-NEXT:    s_mov_b32 s4, s6
3845; GFX900-NEXT:    s_mov_b32 s5, s7
3846; GFX900-NEXT:    s_mov_b32 s6, s8
3847; GFX900-NEXT:    s_mov_b32 s7, s9
3848; GFX900-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3849; GFX900-NEXT:    s_waitcnt vmcnt(0)
3850; GFX900-NEXT:    ; return to shader part epilog
3851;
3852; GFX90A-LABEL: atomic_add_i64_2darraymsaa:
3853; GFX90A:       ; %bb.0: ; %main_body
3854; GFX90A-NEXT:    s_mov_b32 s0, s2
3855; GFX90A-NEXT:    s_mov_b32 s1, s3
3856; GFX90A-NEXT:    s_mov_b32 s2, s4
3857; GFX90A-NEXT:    s_mov_b32 s3, s5
3858; GFX90A-NEXT:    s_mov_b32 s4, s6
3859; GFX90A-NEXT:    s_mov_b32 s5, s7
3860; GFX90A-NEXT:    s_mov_b32 s6, s8
3861; GFX90A-NEXT:    s_mov_b32 s7, s9
3862; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
3863; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3864; GFX90A-NEXT:    ; return to shader part epilog
3865;
3866; GFX10PLUS-LABEL: atomic_add_i64_2darraymsaa:
3867; GFX10PLUS:       ; %bb.0: ; %main_body
3868; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3869; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3870; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3871; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3872; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3873; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3874; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3875; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3876; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
3877; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3878; GFX10PLUS-NEXT:    ; return to shader part epilog
3879;
3880; GFX12-LABEL: atomic_add_i64_2darraymsaa:
3881; GFX12:       ; %bb.0: ; %main_body
3882; GFX12-NEXT:    s_mov_b32 s0, s2
3883; GFX12-NEXT:    s_mov_b32 s1, s3
3884; GFX12-NEXT:    s_mov_b32 s2, s4
3885; GFX12-NEXT:    s_mov_b32 s3, s5
3886; GFX12-NEXT:    s_mov_b32 s4, s6
3887; GFX12-NEXT:    s_mov_b32 s5, s7
3888; GFX12-NEXT:    s_mov_b32 s6, s8
3889; GFX12-NEXT:    s_mov_b32 s7, s9
3890; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4, v5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
3891; GFX12-NEXT:    s_wait_loadcnt 0x0
3892; GFX12-NEXT:    ; return to shader part epilog
3893main_body:
3894  %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
3895  %out = bitcast i64 %v to <2 x float>
3896  ret <2 x float> %out
3897}
3898
3899define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
3900; GFX6-LABEL: atomic_add_i64_1d_slc:
3901; GFX6:       ; %bb.0: ; %main_body
3902; GFX6-NEXT:    s_mov_b32 s0, s2
3903; GFX6-NEXT:    s_mov_b32 s1, s3
3904; GFX6-NEXT:    s_mov_b32 s2, s4
3905; GFX6-NEXT:    s_mov_b32 s3, s5
3906; GFX6-NEXT:    s_mov_b32 s4, s6
3907; GFX6-NEXT:    s_mov_b32 s5, s7
3908; GFX6-NEXT:    s_mov_b32 s6, s8
3909; GFX6-NEXT:    s_mov_b32 s7, s9
3910; GFX6-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3911; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
3912; GFX6-NEXT:    ; return to shader part epilog
3913;
3914; GFX8-LABEL: atomic_add_i64_1d_slc:
3915; GFX8:       ; %bb.0: ; %main_body
3916; GFX8-NEXT:    s_mov_b32 s0, s2
3917; GFX8-NEXT:    s_mov_b32 s1, s3
3918; GFX8-NEXT:    s_mov_b32 s2, s4
3919; GFX8-NEXT:    s_mov_b32 s3, s5
3920; GFX8-NEXT:    s_mov_b32 s4, s6
3921; GFX8-NEXT:    s_mov_b32 s5, s7
3922; GFX8-NEXT:    s_mov_b32 s6, s8
3923; GFX8-NEXT:    s_mov_b32 s7, s9
3924; GFX8-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3925; GFX8-NEXT:    s_waitcnt vmcnt(0)
3926; GFX8-NEXT:    ; return to shader part epilog
3927;
3928; GFX900-LABEL: atomic_add_i64_1d_slc:
3929; GFX900:       ; %bb.0: ; %main_body
3930; GFX900-NEXT:    s_mov_b32 s0, s2
3931; GFX900-NEXT:    s_mov_b32 s1, s3
3932; GFX900-NEXT:    s_mov_b32 s2, s4
3933; GFX900-NEXT:    s_mov_b32 s3, s5
3934; GFX900-NEXT:    s_mov_b32 s4, s6
3935; GFX900-NEXT:    s_mov_b32 s5, s7
3936; GFX900-NEXT:    s_mov_b32 s6, s8
3937; GFX900-NEXT:    s_mov_b32 s7, s9
3938; GFX900-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3939; GFX900-NEXT:    s_waitcnt vmcnt(0)
3940; GFX900-NEXT:    ; return to shader part epilog
3941;
3942; GFX90A-LABEL: atomic_add_i64_1d_slc:
3943; GFX90A:       ; %bb.0: ; %main_body
3944; GFX90A-NEXT:    s_mov_b32 s0, s2
3945; GFX90A-NEXT:    s_mov_b32 s1, s3
3946; GFX90A-NEXT:    s_mov_b32 s2, s4
3947; GFX90A-NEXT:    s_mov_b32 s3, s5
3948; GFX90A-NEXT:    s_mov_b32 s4, s6
3949; GFX90A-NEXT:    s_mov_b32 s5, s7
3950; GFX90A-NEXT:    s_mov_b32 s6, s8
3951; GFX90A-NEXT:    s_mov_b32 s7, s9
3952; GFX90A-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
3953; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3954; GFX90A-NEXT:    ; return to shader part epilog
3955;
3956; GFX10PLUS-LABEL: atomic_add_i64_1d_slc:
3957; GFX10PLUS:       ; %bb.0: ; %main_body
3958; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3959; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3960; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3961; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3962; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3963; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3964; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3965; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3966; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
3967; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
3968; GFX10PLUS-NEXT:    ; return to shader part epilog
3969;
3970; GFX12-LABEL: atomic_add_i64_1d_slc:
3971; GFX12:       ; %bb.0: ; %main_body
3972; GFX12-NEXT:    s_mov_b32 s0, s2
3973; GFX12-NEXT:    s_mov_b32 s1, s3
3974; GFX12-NEXT:    s_mov_b32 s2, s4
3975; GFX12-NEXT:    s_mov_b32 s3, s5
3976; GFX12-NEXT:    s_mov_b32 s4, s6
3977; GFX12-NEXT:    s_mov_b32 s5, s7
3978; GFX12-NEXT:    s_mov_b32 s6, s8
3979; GFX12-NEXT:    s_mov_b32 s7, s9
3980; GFX12-NEXT:    image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
3981; GFX12-NEXT:    s_wait_loadcnt 0x0
3982; GFX12-NEXT:    ; return to shader part epilog
3983main_body:
3984  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
3985  %out = bitcast i64 %v to <2 x float>
3986  ret <2 x float> %out
3987}
3988
3989declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3990declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3991declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3992declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3993declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3994declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3995declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3996declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3997declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3998declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
3999declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4000declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4001declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4002declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4003declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4004declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4005declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4006declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4007declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4008declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4009
4010declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4011declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4012declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4013declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4014declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4015declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4016declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4017declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4018declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4019declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4020declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4021declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4022declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4023declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4024declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4025declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4026declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4027declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4028declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4029declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
4030
4031attributes #0 = { nounwind }
4032