xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll (revision c2c650f62e15ca2444e1a938fdf869c84535ef16)
1; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture, i32, i1) nounwind
5declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
6declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i64, i1) nounwind
7
8
9; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
10; SI-DAG: ds_read_u8
11; SI-DAG: ds_read_u8
12; SI-DAG: ds_read_u8
13; SI-DAG: ds_read_u8
14; SI-DAG: ds_read_u8
15; SI-DAG: ds_read_u8
16; SI-DAG: ds_read_u8
17; SI-DAG: ds_read_u8
18
19; SI-DAG: ds_read_u8
20; SI-DAG: ds_read_u8
21; SI-DAG: ds_read_u8
22; SI-DAG: ds_read_u8
23; SI-DAG: ds_read_u8
24; SI-DAG: ds_read_u8
25; SI-DAG: ds_read_u8
26; SI-DAG: ds_read_u8
27
28; SI-DAG: ds_read_u8
29; SI-DAG: ds_read_u8
30; SI-DAG: ds_read_u8
31; SI-DAG: ds_read_u8
32; SI-DAG: ds_read_u8
33; SI-DAG: ds_read_u8
34; SI-DAG: ds_read_u8
35; SI-DAG: ds_read_u8
36
37; SI-DAG: ds_read_u8
38; SI-DAG: ds_read_u8
39; SI-DAG: ds_read_u8
40; SI-DAG: ds_read_u8
41; SI-DAG: ds_read_u8
42; SI-DAG: ds_read_u8
43; SI-DAG: ds_read_u8
44; SI-DAG: ds_read_u8
45
46; SI-DAG: ds_write_b8
47; SI-DAG: ds_write_b8
48; SI-DAG: ds_write_b8
49; SI-DAG: ds_write_b8
50; SI-DAG: ds_write_b8
51; SI-DAG: ds_write_b8
52; SI-DAG: ds_write_b8
53; SI-DAG: ds_write_b8
54
55; SI-DAG: ds_write_b8
56; SI-DAG: ds_write_b8
57; SI-DAG: ds_write_b8
58; SI-DAG: ds_write_b8
59; SI-DAG: ds_write_b8
60; SI-DAG: ds_write_b8
61; SI-DAG: ds_write_b8
62; SI-DAG: ds_write_b8
63
64; SI-DAG: ds_write_b8
65; SI-DAG: ds_write_b8
66; SI-DAG: ds_write_b8
67; SI-DAG: ds_write_b8
68; SI-DAG: ds_write_b8
69; SI-DAG: ds_write_b8
70; SI-DAG: ds_write_b8
71; SI-DAG: ds_write_b8
72
73; SI-DAG: ds_write_b8
74; SI-DAG: ds_write_b8
75; SI-DAG: ds_write_b8
76; SI-DAG: ds_write_b8
77; SI-DAG: ds_write_b8
78; SI-DAG: ds_write_b8
79; SI-DAG: ds_write_b8
80; SI-DAG: ds_write_b8
81
82; SI: s_endpgm
83define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
84  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) %out, ptr addrspace(3) %in, i32 32, i1 false) nounwind
85  ret void
86}
87
88; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
89; SI-DAG: ds_read_u16
90; SI-DAG: ds_read_u16
91; SI-DAG: ds_read_u16
92; SI-DAG: ds_read_u16
93; SI-DAG: ds_read_u16
94; SI-DAG: ds_read_u16
95; SI-DAG: ds_read_u16
96; SI-DAG: ds_read_u16
97
98; SI-DAG: ds_read_u16
99; SI-DAG: ds_read_u16
100; SI-DAG: ds_read_u16
101; SI-DAG: ds_read_u16
102; SI-DAG: ds_read_u16
103; SI-DAG: ds_read_u16
104; SI-DAG: ds_read_u16
105; SI-DAG: ds_read_u16
106
107; SI-DAG: ds_write_b16
108; SI-DAG: ds_write_b16
109; SI-DAG: ds_write_b16
110; SI-DAG: ds_write_b16
111; SI-DAG: ds_write_b16
112; SI-DAG: ds_write_b16
113; SI-DAG: ds_write_b16
114; SI-DAG: ds_write_b16
115
116; SI-DAG: ds_write_b16
117; SI-DAG: ds_write_b16
118; SI-DAG: ds_write_b16
119; SI-DAG: ds_write_b16
120; SI-DAG: ds_write_b16
121; SI-DAG: ds_write_b16
122; SI-DAG: ds_write_b16
123; SI-DAG: ds_write_b16
124
125; SI: s_endpgm
126define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
127  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 2 %out, ptr addrspace(3) align 2 %in, i32 32, i1 false) nounwind
128  ret void
129}
130
131; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
132; SI: ds_read2_b32
133; SI: ds_read2_b32
134; SI: ds_read2_b32
135; SI: ds_read2_b32
136
137; SI: ds_write2_b32
138; SI: ds_write2_b32
139; SI: ds_write2_b32
140; SI: ds_write2_b32
141
142; SI: s_endpgm
143define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
144  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 4 %out, ptr addrspace(3) align 4 %in, i32 32, i1 false) nounwind
145  ret void
146}
147
148; FIXME: Use 64-bit ops
149; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
150
151; SI: ds_read2_b64
152; SI: ds_read2_b64
153
154; SI: ds_write2_b64
155; SI: ds_write2_b64
156
157; SI-DAG: s_endpgm
158define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(ptr addrspace(3) noalias %out, ptr addrspace(3) noalias %in) nounwind {
159  call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) align 8 %out, ptr addrspace(3) align 8 %in, i32 32, i1 false) nounwind
160  ret void
161}
162
163; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
164; SI-DAG: buffer_load_ubyte
165; SI-DAG: buffer_store_byte
166; SI-DAG: buffer_load_ubyte
167; SI-DAG: buffer_store_byte
168; SI-DAG: buffer_load_ubyte
169; SI-DAG: buffer_store_byte
170; SI-DAG: buffer_load_ubyte
171; SI-DAG: buffer_store_byte
172; SI-DAG: buffer_load_ubyte
173; SI-DAG: buffer_store_byte
174; SI-DAG: buffer_load_ubyte
175; SI-DAG: buffer_store_byte
176; SI-DAG: buffer_load_ubyte
177; SI-DAG: buffer_store_byte
178; SI-DAG: buffer_load_ubyte
179; SI-DAG: buffer_store_byte
180
181; SI-DAG: buffer_load_ubyte
182; SI-DAG: buffer_store_byte
183; SI-DAG: buffer_load_ubyte
184; SI-DAG: buffer_store_byte
185; SI-DAG: buffer_load_ubyte
186; SI-DAG: buffer_store_byte
187; SI-DAG: buffer_load_ubyte
188; SI-DAG: buffer_store_byte
189; SI-DAG: buffer_load_ubyte
190; SI-DAG: buffer_store_byte
191; SI-DAG: buffer_load_ubyte
192; SI-DAG: buffer_store_byte
193; SI-DAG: buffer_load_ubyte
194; SI-DAG: buffer_store_byte
195; SI-DAG: buffer_load_ubyte
196; SI-DAG: buffer_store_byte
197
198; SI-DAG: buffer_load_ubyte
199; SI-DAG: buffer_store_byte
200; SI-DAG: buffer_load_ubyte
201; SI-DAG: buffer_store_byte
202; SI-DAG: buffer_load_ubyte
203; SI-DAG: buffer_store_byte
204; SI-DAG: buffer_load_ubyte
205; SI-DAG: buffer_store_byte
206; SI-DAG: buffer_load_ubyte
207; SI-DAG: buffer_store_byte
208; SI-DAG: buffer_load_ubyte
209; SI-DAG: buffer_store_byte
210; SI-DAG: buffer_load_ubyte
211; SI-DAG: buffer_store_byte
212; SI-DAG: buffer_load_ubyte
213; SI-DAG: buffer_store_byte
214
215; SI-DAG: buffer_load_ubyte
216; SI-DAG: buffer_store_byte
217; SI-DAG: buffer_load_ubyte
218; SI-DAG: buffer_store_byte
219; SI-DAG: buffer_load_ubyte
220; SI-DAG: buffer_store_byte
221; SI-DAG: buffer_load_ubyte
222; SI-DAG: buffer_store_byte
223; SI-DAG: buffer_load_ubyte
224; SI-DAG: buffer_store_byte
225; SI-DAG: buffer_load_ubyte
226; SI-DAG: buffer_store_byte
227; SI-DAG: buffer_load_ubyte
228; SI-DAG: buffer_store_byte
229; SI-DAG: buffer_load_ubyte
230; SI-DAG: buffer_store_byte
231
232; SI: s_endpgm
233define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
234  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 32, i1 false) nounwind
235  ret void
236}
237
238; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
239; SI-DAG: buffer_load_ushort
240; SI-DAG: buffer_load_ushort
241; SI-DAG: buffer_load_ushort
242; SI-DAG: buffer_load_ushort
243; SI-DAG: buffer_load_ushort
244; SI-DAG: buffer_load_ushort
245; SI-DAG: buffer_load_ushort
246; SI-DAG: buffer_load_ushort
247; SI-DAG: buffer_load_ushort
248; SI-DAG: buffer_load_ushort
249; SI-DAG: buffer_load_ushort
250; SI-DAG: buffer_load_ushort
251; SI-DAG: buffer_load_ushort
252; SI-DAG: buffer_load_ushort
253; SI-DAG: buffer_load_ushort
254; SI-DAG: buffer_load_ushort
255
256; SI-DAG: buffer_store_short
257; SI-DAG: buffer_store_short
258; SI-DAG: buffer_store_short
259; SI-DAG: buffer_store_short
260; SI-DAG: buffer_store_short
261; SI-DAG: buffer_store_short
262; SI-DAG: buffer_store_short
263; SI-DAG: buffer_store_short
264; SI-DAG: buffer_store_short
265; SI-DAG: buffer_store_short
266; SI-DAG: buffer_store_short
267; SI-DAG: buffer_store_short
268; SI-DAG: buffer_store_short
269; SI-DAG: buffer_store_short
270; SI-DAG: buffer_store_short
271; SI-DAG: buffer_store_short
272
273; SI: s_endpgm
274define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
275  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 2 %out, ptr addrspace(1) align 2 %in, i64 32, i1 false) nounwind
276  ret void
277}
278
279; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
280; SI: buffer_load_dwordx4
281; SI: buffer_load_dwordx4
282; SI: buffer_store_dwordx4
283; SI: buffer_store_dwordx4
284; SI: s_endpgm
285define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
286  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %out, ptr addrspace(1) align 4 %in, i64 32, i1 false) nounwind
287  ret void
288}
289
290; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
291; SI: buffer_load_dwordx4
292; SI: buffer_load_dwordx4
293; SI: buffer_store_dwordx4
294; SI: buffer_store_dwordx4
295; SI: s_endpgm
296define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
297  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 8 %out, ptr addrspace(1) align 8 %in, i64 32, i1 false) nounwind
298  ret void
299}
300
301; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
302; SI: buffer_load_dwordx4
303; SI: buffer_load_dwordx4
304; SI: buffer_store_dwordx4
305; SI: buffer_store_dwordx4
306; SI: s_endpgm
307define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
308  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 16 %out, ptr addrspace(1) align 16 %in, i64 32, i1 false) nounwind
309  ret void
310}
311
312; Test shouldConvertConstantLoadToIntImm
313@hello.align4 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 4
314@hello.align1 = private unnamed_addr addrspace(4) constant [16 x i8] c"constant string\00", align 1
315
316; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
317; SI: s_getpc_b64
318; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4@rel32@lo+4
319; SI: s_addc_u32
320; SI-DAG: s_load_dwordx8
321; SI-DAG: s_load_dwordx2
322; SI-DAG: buffer_store_dwordx4
323; SI-DAG: buffer_store_dwordx4
324define amdgpu_kernel void @test_memcpy_const_string_align4(ptr addrspace(1) noalias %out) nounwind {
325  call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %out, ptr addrspace(4) align 4 @hello.align4, i64 32, i1 false)
326  ret void
327}
328
329; FUNC-LABEL: {{^}}test_memcpy_const_string_align1:
330; SI-NOT: buffer_load
331; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x
332; SI: buffer_store_byte
333; SI: buffer_store_byte
334; SI: buffer_store_byte
335; SI: buffer_store_byte
336; SI: buffer_store_byte
337; SI: buffer_store_byte
338; SI: buffer_store_byte
339; SI: buffer_store_byte
340; SI: buffer_store_byte
341; SI: buffer_store_byte
342; SI: buffer_store_byte
343; SI: buffer_store_byte
344; SI: buffer_store_byte
345; SI: buffer_store_byte
346; SI: buffer_store_byte
347; SI: buffer_store_byte
348define amdgpu_kernel void @test_memcpy_const_string_align1(ptr addrspace(1) noalias %out) nounwind {
349  call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) %out, ptr addrspace(4) @hello.align1, i64 32, i1 false)
350  ret void
351}
352