xref: /llvm-project/llvm/test/CodeGen/AMDGPU/memcpy-param-combinations.ll (revision 173c68239d1d11f4e36c8af07a28310da67568a7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
4
5; Testing codegen for memcpy with vector operands for all combinations of the following parameters:
6;     destination address space: 0, 1, 3, 5
7;     source address space: 0, 1, 3, 4, 5
8;     alignment: 1, 2, 8, 16
9;     sizes: 16, 31, 32
10
11
12define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
13; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1:
14; CHECK:       ; %bb.0: ; %entry
15; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
17; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
18; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
19; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
20; CHECK-NEXT:    s_setpc_b64 s[30:31]
21entry:
22  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
23  ret void
24}
25
26define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
27; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1:
28; CHECK:       ; %bb.0: ; %entry
29; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; CHECK-NEXT:    s_clause 0x3
31; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
32; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
33; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
34; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
35; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
36; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
37; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
38; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
39; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
40; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
41; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(3)
42; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
43; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
44; CHECK-NEXT:    s_setpc_b64 s[30:31]
45entry:
46  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
47  ret void
48}
49
50define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
51; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1:
52; CHECK:       ; %bb.0: ; %entry
53; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; CHECK-NEXT:    s_clause 0x1
55; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
56; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
57; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
58; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
59; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
60; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
61; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
62; CHECK-NEXT:    s_setpc_b64 s[30:31]
63entry:
64  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
65  ret void
66}
67
68define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
69; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2:
70; CHECK:       ; %bb.0: ; %entry
71; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
73; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
74; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
75; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
76; CHECK-NEXT:    s_setpc_b64 s[30:31]
77entry:
78  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
79  ret void
80}
81
82define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
83; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2:
84; CHECK:       ; %bb.0: ; %entry
85; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; CHECK-NEXT:    s_clause 0x3
87; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
88; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
89; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
90; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
91; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
92; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
93; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
94; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
95; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
96; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
97; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(3)
98; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
99; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
100; CHECK-NEXT:    s_setpc_b64 s[30:31]
101entry:
102  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
103  ret void
104}
105
106define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
107; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2:
108; CHECK:       ; %bb.0: ; %entry
109; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; CHECK-NEXT:    s_clause 0x1
111; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
112; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
113; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
114; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
115; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
116; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
117; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
118; CHECK-NEXT:    s_setpc_b64 s[30:31]
119entry:
120  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
121  ret void
122}
123
124define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
125; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8:
126; CHECK:       ; %bb.0: ; %entry
127; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
129; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
130; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
131; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
132; CHECK-NEXT:    s_setpc_b64 s[30:31]
133entry:
134  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
135  ret void
136}
137
138define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
139; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8:
140; CHECK:       ; %bb.0: ; %entry
141; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; CHECK-NEXT:    s_clause 0x1
143; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:15
144; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
145; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
146; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:15
147; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
148; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
149; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
150; CHECK-NEXT:    s_setpc_b64 s[30:31]
151entry:
152  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
153  ret void
154}
155
156define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
157; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8:
158; CHECK:       ; %bb.0: ; %entry
159; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160; CHECK-NEXT:    s_clause 0x1
161; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
162; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
163; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
164; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
165; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
166; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
167; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
168; CHECK-NEXT:    s_setpc_b64 s[30:31]
169entry:
170  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
171  ret void
172}
173
174define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
175; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16:
176; CHECK:       ; %bb.0: ; %entry
177; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
179; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
180; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
181; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
182; CHECK-NEXT:    s_setpc_b64 s[30:31]
183entry:
184  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
185  ret void
186}
187
188define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
189; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16:
190; CHECK:       ; %bb.0: ; %entry
191; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; CHECK-NEXT:    s_clause 0x1
193; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:15
194; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
195; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
196; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:15
197; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
198; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
199; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
200; CHECK-NEXT:    s_setpc_b64 s[30:31]
201entry:
202  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
203  ret void
204}
205
206define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
207; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16:
208; CHECK:       ; %bb.0: ; %entry
209; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; CHECK-NEXT:    s_clause 0x1
211; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
212; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
213; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
214; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
215; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
216; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
217; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
218; CHECK-NEXT:    s_setpc_b64 s[30:31]
219entry:
220  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
221  ret void
222}
223
224define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
225; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1:
226; CHECK:       ; %bb.0: ; %entry
227; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
229; CHECK-NEXT:    s_waitcnt vmcnt(0)
230; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
231; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
232; CHECK-NEXT:    s_setpc_b64 s[30:31]
233entry:
234  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
235  ret void
236}
237
238define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
239; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1:
240; CHECK:       ; %bb.0: ; %entry
241; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; CHECK-NEXT:    s_clause 0x3
243; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
244; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
245; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
246; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
247; CHECK-NEXT:    s_waitcnt vmcnt(3)
248; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
249; CHECK-NEXT:    s_waitcnt vmcnt(2)
250; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
251; CHECK-NEXT:    s_waitcnt vmcnt(1)
252; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
253; CHECK-NEXT:    s_waitcnt vmcnt(0)
254; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
255; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
256; CHECK-NEXT:    s_setpc_b64 s[30:31]
257entry:
258  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
259  ret void
260}
261
262define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
263; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1:
264; CHECK:       ; %bb.0: ; %entry
265; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; CHECK-NEXT:    s_clause 0x1
267; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
268; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
269; CHECK-NEXT:    s_waitcnt vmcnt(1)
270; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
271; CHECK-NEXT:    s_waitcnt vmcnt(0)
272; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
273; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
274; CHECK-NEXT:    s_setpc_b64 s[30:31]
275entry:
276  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
277  ret void
278}
279
280define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
281; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2:
282; CHECK:       ; %bb.0: ; %entry
283; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
285; CHECK-NEXT:    s_waitcnt vmcnt(0)
286; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
287; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
288; CHECK-NEXT:    s_setpc_b64 s[30:31]
289entry:
290  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
291  ret void
292}
293
294define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
295; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2:
296; CHECK:       ; %bb.0: ; %entry
297; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; CHECK-NEXT:    s_clause 0x3
299; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
300; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
301; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
302; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
303; CHECK-NEXT:    s_waitcnt vmcnt(3)
304; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
305; CHECK-NEXT:    s_waitcnt vmcnt(2)
306; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
307; CHECK-NEXT:    s_waitcnt vmcnt(1)
308; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
309; CHECK-NEXT:    s_waitcnt vmcnt(0)
310; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
311; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
312; CHECK-NEXT:    s_setpc_b64 s[30:31]
313entry:
314  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
315  ret void
316}
317
318define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
319; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2:
320; CHECK:       ; %bb.0: ; %entry
321; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; CHECK-NEXT:    s_clause 0x1
323; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
324; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
325; CHECK-NEXT:    s_waitcnt vmcnt(1)
326; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
327; CHECK-NEXT:    s_waitcnt vmcnt(0)
328; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
329; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
330; CHECK-NEXT:    s_setpc_b64 s[30:31]
331entry:
332  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
333  ret void
334}
335
336define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
337; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8:
338; CHECK:       ; %bb.0: ; %entry
339; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
341; CHECK-NEXT:    s_waitcnt vmcnt(0)
342; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
343; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
344; CHECK-NEXT:    s_setpc_b64 s[30:31]
345entry:
346  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
347  ret void
348}
349
350define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
351; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8:
352; CHECK:       ; %bb.0: ; %entry
353; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; CHECK-NEXT:    s_clause 0x1
355; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:15
356; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
357; CHECK-NEXT:    s_waitcnt vmcnt(1)
358; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:15
359; CHECK-NEXT:    s_waitcnt vmcnt(0)
360; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
361; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
362; CHECK-NEXT:    s_setpc_b64 s[30:31]
363entry:
364  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
365  ret void
366}
367
368define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
369; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8:
370; CHECK:       ; %bb.0: ; %entry
371; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; CHECK-NEXT:    s_clause 0x1
373; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
374; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
375; CHECK-NEXT:    s_waitcnt vmcnt(1)
376; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
377; CHECK-NEXT:    s_waitcnt vmcnt(0)
378; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
379; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
380; CHECK-NEXT:    s_setpc_b64 s[30:31]
381entry:
382  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
383  ret void
384}
385
386define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
387; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16:
388; CHECK:       ; %bb.0: ; %entry
389; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
391; CHECK-NEXT:    s_waitcnt vmcnt(0)
392; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
393; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
394; CHECK-NEXT:    s_setpc_b64 s[30:31]
395entry:
396  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
397  ret void
398}
399
400define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
401; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16:
402; CHECK:       ; %bb.0: ; %entry
403; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; CHECK-NEXT:    s_clause 0x1
405; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:15
406; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
407; CHECK-NEXT:    s_waitcnt vmcnt(1)
408; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:15
409; CHECK-NEXT:    s_waitcnt vmcnt(0)
410; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
411; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
412; CHECK-NEXT:    s_setpc_b64 s[30:31]
413entry:
414  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
415  ret void
416}
417
418define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
419; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16:
420; CHECK:       ; %bb.0: ; %entry
421; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; CHECK-NEXT:    s_clause 0x1
423; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
424; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
425; CHECK-NEXT:    s_waitcnt vmcnt(1)
426; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
427; CHECK-NEXT:    s_waitcnt vmcnt(0)
428; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
429; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
430; CHECK-NEXT:    s_setpc_b64 s[30:31]
431entry:
432  tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
433  ret void
434}
435
436define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
437; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1:
438; CHECK:       ; %bb.0: ; %entry
439; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
441; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
442; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
443; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
444; CHECK-NEXT:    s_setpc_b64 s[30:31]
445entry:
446  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
447  ret void
448}
449
450define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
451; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1:
452; CHECK:       ; %bb.0: ; %entry
453; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
455; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
456; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
457; CHECK-NEXT:    ds_read_b64 v[6:7], v2 offset:16
458; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
459; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
460; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
461; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
462; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
463; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
464; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
465; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
466; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
467; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
468; CHECK-NEXT:    s_setpc_b64 s[30:31]
469entry:
470  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
471  ret void
472}
473
474define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
475; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1:
476; CHECK:       ; %bb.0: ; %entry
477; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
479; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
480; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
481; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
482; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
483; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
484; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
485; CHECK-NEXT:    s_setpc_b64 s[30:31]
486entry:
487  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
488  ret void
489}
490
491define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
492; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2:
493; CHECK:       ; %bb.0: ; %entry
494; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
496; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
497; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
498; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
499; CHECK-NEXT:    s_setpc_b64 s[30:31]
500entry:
501  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
502  ret void
503}
504
505define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
506; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2:
507; CHECK:       ; %bb.0: ; %entry
508; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
510; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
511; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
512; CHECK-NEXT:    ds_read_b64 v[6:7], v2 offset:16
513; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
514; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
515; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
516; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
517; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
518; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
519; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
520; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
521; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
522; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
523; CHECK-NEXT:    s_setpc_b64 s[30:31]
524entry:
525  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
526  ret void
527}
528
529define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
530; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2:
531; CHECK:       ; %bb.0: ; %entry
532; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
534; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
535; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
536; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
537; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
538; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
539; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
540; CHECK-NEXT:    s_setpc_b64 s[30:31]
541entry:
542  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
543  ret void
544}
545
546define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
547; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8:
548; CHECK:       ; %bb.0: ; %entry
549; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
551; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
552; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
553; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
554; CHECK-NEXT:    s_setpc_b64 s[30:31]
555entry:
556  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
557  ret void
558}
559
560define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
561; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8:
562; CHECK:       ; %bb.0: ; %entry
563; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:15
565; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
566; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
567; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:15
568; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
569; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
570; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
571; CHECK-NEXT:    s_setpc_b64 s[30:31]
572entry:
573  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
574  ret void
575}
576
577define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
578; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8:
579; CHECK:       ; %bb.0: ; %entry
580; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
582; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
583; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
584; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
585; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
586; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
587; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
588; CHECK-NEXT:    s_setpc_b64 s[30:31]
589entry:
590  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
591  ret void
592}
593
594define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
595; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16:
596; CHECK:       ; %bb.0: ; %entry
597; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; CHECK-NEXT:    ds_read_b128 v[2:5], v2
599; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
600; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
601; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
602; CHECK-NEXT:    s_setpc_b64 s[30:31]
603entry:
604  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
605  ret void
606}
607
608define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
609; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16:
610; CHECK:       ; %bb.0: ; %entry
611; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:15
613; CHECK-NEXT:    ds_read_b128 v[7:10], v2
614; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
615; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:15
616; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
617; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
618; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
619; CHECK-NEXT:    s_setpc_b64 s[30:31]
620entry:
621  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
622  ret void
623}
624
625define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
626; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16:
627; CHECK:       ; %bb.0: ; %entry
628; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:16
630; CHECK-NEXT:    ds_read_b128 v[7:10], v2
631; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
632; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
633; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
634; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
635; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
636; CHECK-NEXT:    s_setpc_b64 s[30:31]
637entry:
638  tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
639  ret void
640}
641
642define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
643; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1:
644; CHECK:       ; %bb.0: ; %entry
645; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
647; CHECK-NEXT:    s_waitcnt vmcnt(0)
648; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
649; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:8
650; CHECK-NEXT:    s_waitcnt vmcnt(0)
651; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[2:3] offset:8
652; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
653; CHECK-NEXT:    s_setpc_b64 s[30:31]
654entry:
655  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
656  ret void
657}
658
659define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
660; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1:
661; CHECK:       ; %bb.0: ; %entry
662; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
664; CHECK-NEXT:    s_waitcnt vmcnt(0)
665; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
666; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:8
667; CHECK-NEXT:    s_waitcnt vmcnt(0)
668; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:8
669; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:16
670; CHECK-NEXT:    s_waitcnt vmcnt(0)
671; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:16
672; CHECK-NEXT:    global_load_dword v4, v[2:3], off offset:24
673; CHECK-NEXT:    s_waitcnt vmcnt(0)
674; CHECK-NEXT:    flat_store_dword v[0:1], v4 offset:24
675; CHECK-NEXT:    global_load_ushort v4, v[2:3], off offset:28
676; CHECK-NEXT:    s_waitcnt vmcnt(0)
677; CHECK-NEXT:    flat_store_short v[0:1], v4 offset:28
678; CHECK-NEXT:    global_load_ubyte v2, v[2:3], off offset:30
679; CHECK-NEXT:    s_waitcnt vmcnt(0)
680; CHECK-NEXT:    flat_store_byte v[0:1], v2 offset:30
681; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
682; CHECK-NEXT:    s_setpc_b64 s[30:31]
683entry:
684  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
685  ret void
686}
687
688define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
689; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1:
690; CHECK:       ; %bb.0: ; %entry
691; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
692; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
693; CHECK-NEXT:    s_waitcnt vmcnt(0)
694; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
695; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:8
696; CHECK-NEXT:    s_waitcnt vmcnt(0)
697; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:8
698; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:16
699; CHECK-NEXT:    s_waitcnt vmcnt(0)
700; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:16
701; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:24
702; CHECK-NEXT:    s_waitcnt vmcnt(0)
703; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[2:3] offset:24
704; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
705; CHECK-NEXT:    s_setpc_b64 s[30:31]
706entry:
707  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
708  ret void
709}
710
711define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
712; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2:
713; CHECK:       ; %bb.0: ; %entry
714; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
715; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
716; CHECK-NEXT:    s_waitcnt vmcnt(0)
717; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
718; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:8
719; CHECK-NEXT:    s_waitcnt vmcnt(0)
720; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[2:3] offset:8
721; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
722; CHECK-NEXT:    s_setpc_b64 s[30:31]
723entry:
724  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
725  ret void
726}
727
728define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
729; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2:
730; CHECK:       ; %bb.0: ; %entry
731; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
733; CHECK-NEXT:    s_waitcnt vmcnt(0)
734; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
735; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:8
736; CHECK-NEXT:    s_waitcnt vmcnt(0)
737; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:8
738; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:16
739; CHECK-NEXT:    s_waitcnt vmcnt(0)
740; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:16
741; CHECK-NEXT:    global_load_dword v4, v[2:3], off offset:24
742; CHECK-NEXT:    s_waitcnt vmcnt(0)
743; CHECK-NEXT:    flat_store_dword v[0:1], v4 offset:24
744; CHECK-NEXT:    global_load_ushort v4, v[2:3], off offset:28
745; CHECK-NEXT:    s_waitcnt vmcnt(0)
746; CHECK-NEXT:    flat_store_short v[0:1], v4 offset:28
747; CHECK-NEXT:    global_load_ubyte v2, v[2:3], off offset:30
748; CHECK-NEXT:    s_waitcnt vmcnt(0)
749; CHECK-NEXT:    flat_store_byte v[0:1], v2 offset:30
750; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
751; CHECK-NEXT:    s_setpc_b64 s[30:31]
752entry:
753  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
754  ret void
755}
756
757define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
758; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2:
759; CHECK:       ; %bb.0: ; %entry
760; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off
762; CHECK-NEXT:    s_waitcnt vmcnt(0)
763; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5]
764; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:8
765; CHECK-NEXT:    s_waitcnt vmcnt(0)
766; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:8
767; CHECK-NEXT:    global_load_dwordx2 v[4:5], v[2:3], off offset:16
768; CHECK-NEXT:    s_waitcnt vmcnt(0)
769; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] offset:16
770; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:24
771; CHECK-NEXT:    s_waitcnt vmcnt(0)
772; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[2:3] offset:24
773; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
774; CHECK-NEXT:    s_setpc_b64 s[30:31]
775entry:
776  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
777  ret void
778}
779
780define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
781; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8:
782; CHECK:       ; %bb.0: ; %entry
783; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
785; CHECK-NEXT:    s_waitcnt vmcnt(0)
786; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
787; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
788; CHECK-NEXT:    s_setpc_b64 s[30:31]
789entry:
790  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
791  ret void
792}
793
794define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
795; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8:
796; CHECK:       ; %bb.0: ; %entry
797; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
799; CHECK-NEXT:    s_waitcnt vmcnt(0)
800; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
801; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:15
802; CHECK-NEXT:    s_waitcnt vmcnt(0)
803; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:15
804; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
805; CHECK-NEXT:    s_setpc_b64 s[30:31]
806entry:
807  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
808  ret void
809}
810
811define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
812; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8:
813; CHECK:       ; %bb.0: ; %entry
814; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
816; CHECK-NEXT:    s_waitcnt vmcnt(0)
817; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
818; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:16
819; CHECK-NEXT:    s_waitcnt vmcnt(0)
820; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:16
821; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
822; CHECK-NEXT:    s_setpc_b64 s[30:31]
823entry:
824  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
825  ret void
826}
827
828define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
829; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16:
830; CHECK:       ; %bb.0: ; %entry
831; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
833; CHECK-NEXT:    s_waitcnt vmcnt(0)
834; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
835; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
836; CHECK-NEXT:    s_setpc_b64 s[30:31]
837entry:
838  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
839  ret void
840}
841
842define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
843; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16:
844; CHECK:       ; %bb.0: ; %entry
845; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
847; CHECK-NEXT:    s_waitcnt vmcnt(0)
848; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
849; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:15
850; CHECK-NEXT:    s_waitcnt vmcnt(0)
851; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:15
852; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
853; CHECK-NEXT:    s_setpc_b64 s[30:31]
854entry:
855  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
856  ret void
857}
858
859define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
860; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16:
861; CHECK:       ; %bb.0: ; %entry
862; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
864; CHECK-NEXT:    s_waitcnt vmcnt(0)
865; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
866; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:16
867; CHECK-NEXT:    s_waitcnt vmcnt(0)
868; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:16
869; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
870; CHECK-NEXT:    s_setpc_b64 s[30:31]
871entry:
872  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
873  ret void
874}
875
876define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
877; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1:
878; CHECK:       ; %bb.0: ; %entry
879; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880; CHECK-NEXT:    s_clause 0x3
881; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
882; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
883; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
884; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
885; CHECK-NEXT:    s_waitcnt vmcnt(0)
886; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
887; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
888; CHECK-NEXT:    s_setpc_b64 s[30:31]
889entry:
890  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
891  ret void
892}
893
894define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
895; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1:
896; CHECK:       ; %bb.0: ; %entry
897; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
898; CHECK-NEXT:    s_clause 0x8
899; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
900; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
901; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
902; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
903; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
904; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
905; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
906; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
907; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
908; CHECK-NEXT:    s_waitcnt vmcnt(5)
909; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
910; CHECK-NEXT:    s_waitcnt vmcnt(4)
911; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
912; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
913; CHECK-NEXT:    s_waitcnt vmcnt(0)
914; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
915; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
916; CHECK-NEXT:    s_setpc_b64 s[30:31]
917entry:
918  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
919  ret void
920}
921
922define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
923; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1:
924; CHECK:       ; %bb.0: ; %entry
925; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926; CHECK-NEXT:    s_clause 0x7
927; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
928; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
929; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
930; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
931; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen
932; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
933; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
934; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
935; CHECK-NEXT:    s_waitcnt vmcnt(4)
936; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
937; CHECK-NEXT:    s_waitcnt vmcnt(0)
938; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
939; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
940; CHECK-NEXT:    s_setpc_b64 s[30:31]
941entry:
942  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
943  ret void
944}
945
946define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
947; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2:
948; CHECK:       ; %bb.0: ; %entry
949; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950; CHECK-NEXT:    s_clause 0x3
951; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
952; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
953; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
954; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
955; CHECK-NEXT:    s_waitcnt vmcnt(0)
956; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
957; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
958; CHECK-NEXT:    s_setpc_b64 s[30:31]
959entry:
960  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
961  ret void
962}
963
964define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
965; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2:
966; CHECK:       ; %bb.0: ; %entry
967; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968; CHECK-NEXT:    s_clause 0x8
969; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
970; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
971; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
972; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
973; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
974; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
975; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
976; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
977; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
978; CHECK-NEXT:    s_waitcnt vmcnt(5)
979; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
980; CHECK-NEXT:    s_waitcnt vmcnt(4)
981; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
982; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
983; CHECK-NEXT:    s_waitcnt vmcnt(0)
984; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
985; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
986; CHECK-NEXT:    s_setpc_b64 s[30:31]
987entry:
988  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
989  ret void
990}
991
992define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
993; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2:
994; CHECK:       ; %bb.0: ; %entry
995; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; CHECK-NEXT:    s_clause 0x7
997; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
998; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
999; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
1000; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
1001; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen
1002; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
1003; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
1004; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
1005; CHECK-NEXT:    s_waitcnt vmcnt(4)
1006; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
1007; CHECK-NEXT:    s_waitcnt vmcnt(0)
1008; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
1009; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1010; CHECK-NEXT:    s_setpc_b64 s[30:31]
1011entry:
1012  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
1013  ret void
1014}
1015
1016define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1017; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8:
1018; CHECK:       ; %bb.0: ; %entry
1019; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020; CHECK-NEXT:    s_clause 0x3
1021; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1022; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1023; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1024; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1025; CHECK-NEXT:    s_waitcnt vmcnt(0)
1026; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1027; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1028; CHECK-NEXT:    s_setpc_b64 s[30:31]
1029entry:
1030  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
1031  ret void
1032}
1033
1034define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1035; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8:
1036; CHECK:       ; %bb.0: ; %entry
1037; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1038; CHECK-NEXT:    s_clause 0x7
1039; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1040; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1041; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1042; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1043; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
1044; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
1045; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1046; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1047; CHECK-NEXT:    s_waitcnt vmcnt(4)
1048; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1049; CHECK-NEXT:    s_waitcnt vmcnt(0)
1050; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:15
1051; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1052; CHECK-NEXT:    s_setpc_b64 s[30:31]
1053entry:
1054  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
1055  ret void
1056}
1057
1058define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1059; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8:
1060; CHECK:       ; %bb.0: ; %entry
1061; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1062; CHECK-NEXT:    s_clause 0x7
1063; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1064; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1065; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1066; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1067; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1068; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1069; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1070; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1071; CHECK-NEXT:    s_waitcnt vmcnt(4)
1072; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1073; CHECK-NEXT:    s_waitcnt vmcnt(0)
1074; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:16
1075; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1076; CHECK-NEXT:    s_setpc_b64 s[30:31]
1077entry:
1078  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
1079  ret void
1080}
1081
1082define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1083; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16:
1084; CHECK:       ; %bb.0: ; %entry
1085; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086; CHECK-NEXT:    s_clause 0x3
1087; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1088; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1089; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1090; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1091; CHECK-NEXT:    s_waitcnt vmcnt(0)
1092; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1093; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1094; CHECK-NEXT:    s_setpc_b64 s[30:31]
1095entry:
1096  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
1097  ret void
1098}
1099
1100define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1101; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16:
1102; CHECK:       ; %bb.0: ; %entry
1103; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104; CHECK-NEXT:    s_clause 0x7
1105; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1106; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1107; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1108; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1109; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
1110; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
1111; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1112; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1113; CHECK-NEXT:    s_waitcnt vmcnt(4)
1114; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1115; CHECK-NEXT:    s_waitcnt vmcnt(0)
1116; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:15
1117; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1118; CHECK-NEXT:    s_setpc_b64 s[30:31]
1119entry:
1120  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
1121  ret void
1122}
1123
1124define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1125; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16:
1126; CHECK:       ; %bb.0: ; %entry
1127; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128; CHECK-NEXT:    s_clause 0x7
1129; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1130; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1131; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1132; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1133; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1134; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1135; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1136; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1137; CHECK-NEXT:    s_waitcnt vmcnt(4)
1138; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1139; CHECK-NEXT:    s_waitcnt vmcnt(0)
1140; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:16
1141; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1142; CHECK-NEXT:    s_setpc_b64 s[30:31]
1143entry:
1144  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
1145  ret void
1146}
1147
1148define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1149; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1:
1150; CHECK:       ; %bb.0: ; %entry
1151; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1153; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1154; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1155; CHECK-NEXT:    s_setpc_b64 s[30:31]
1156entry:
1157  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
1158  ret void
1159}
1160
1161define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1162; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1:
1163; CHECK:       ; %bb.0: ; %entry
1164; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165; CHECK-NEXT:    s_clause 0x2
1166; CHECK-NEXT:    flat_load_dwordx2 v[6:7], v[2:3] offset:23
1167; CHECK-NEXT:    flat_load_dwordx2 v[8:9], v[2:3] offset:16
1168; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1169; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
1170; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:23
1171; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1172; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:16
1173; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1174; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1175; CHECK-NEXT:    s_setpc_b64 s[30:31]
1176entry:
1177  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
1178  ret void
1179}
1180
1181define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1182; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1:
1183; CHECK:       ; %bb.0: ; %entry
1184; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; CHECK-NEXT:    s_clause 0x1
1186; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1187; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1188; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1189; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1190; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1191; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1192; CHECK-NEXT:    s_setpc_b64 s[30:31]
1193entry:
1194  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
1195  ret void
1196}
1197
1198define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1199; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2:
1200; CHECK:       ; %bb.0: ; %entry
1201; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1203; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1204; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1205; CHECK-NEXT:    s_setpc_b64 s[30:31]
1206entry:
1207  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
1208  ret void
1209}
1210
1211define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1212; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2:
1213; CHECK:       ; %bb.0: ; %entry
1214; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; CHECK-NEXT:    s_clause 0x2
1216; CHECK-NEXT:    flat_load_dwordx2 v[6:7], v[2:3] offset:23
1217; CHECK-NEXT:    flat_load_dwordx2 v[8:9], v[2:3] offset:16
1218; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1219; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
1220; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:23
1221; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1222; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:16
1223; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1224; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1225; CHECK-NEXT:    s_setpc_b64 s[30:31]
1226entry:
1227  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
1228  ret void
1229}
1230
1231define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1232; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2:
1233; CHECK:       ; %bb.0: ; %entry
1234; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235; CHECK-NEXT:    s_clause 0x1
1236; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1237; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1238; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1239; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1240; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1241; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1242; CHECK-NEXT:    s_setpc_b64 s[30:31]
1243entry:
1244  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
1245  ret void
1246}
1247
1248define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1249; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8:
1250; CHECK:       ; %bb.0: ; %entry
1251; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1253; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1254; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1255; CHECK-NEXT:    s_setpc_b64 s[30:31]
1256entry:
1257  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
1258  ret void
1259}
1260
1261define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1262; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8:
1263; CHECK:       ; %bb.0: ; %entry
1264; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265; CHECK-NEXT:    s_clause 0x1
1266; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:15
1267; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1268; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1269; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:15
1270; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1271; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1272; CHECK-NEXT:    s_setpc_b64 s[30:31]
1273entry:
1274  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
1275  ret void
1276}
1277
1278define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1279; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8:
1280; CHECK:       ; %bb.0: ; %entry
1281; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1282; CHECK-NEXT:    s_clause 0x1
1283; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1284; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1285; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1286; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1287; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1288; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1289; CHECK-NEXT:    s_setpc_b64 s[30:31]
1290entry:
1291  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
1292  ret void
1293}
1294
1295define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1296; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16:
1297; CHECK:       ; %bb.0: ; %entry
1298; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1300; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1301; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1302; CHECK-NEXT:    s_setpc_b64 s[30:31]
1303entry:
1304  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
1305  ret void
1306}
1307
1308define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1309; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16:
1310; CHECK:       ; %bb.0: ; %entry
1311; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1312; CHECK-NEXT:    s_clause 0x1
1313; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:15
1314; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1315; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1316; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:15
1317; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1318; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1319; CHECK-NEXT:    s_setpc_b64 s[30:31]
1320entry:
1321  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
1322  ret void
1323}
1324
1325define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1326; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16:
1327; CHECK:       ; %bb.0: ; %entry
1328; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329; CHECK-NEXT:    s_clause 0x1
1330; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1331; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1332; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1333; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1334; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1335; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1336; CHECK-NEXT:    s_setpc_b64 s[30:31]
1337entry:
1338  tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
1339  ret void
1340}
1341
1342define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1343; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1:
1344; CHECK:       ; %bb.0: ; %entry
1345; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1347; CHECK-NEXT:    s_waitcnt vmcnt(0)
1348; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1349; CHECK-NEXT:    s_setpc_b64 s[30:31]
1350entry:
1351  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
1352  ret void
1353}
1354
1355define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1356; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1:
1357; CHECK:       ; %bb.0: ; %entry
1358; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359; CHECK-NEXT:    s_clause 0x2
1360; CHECK-NEXT:    global_load_dwordx2 v[8:9], v[2:3], off offset:23
1361; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1362; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:16
1363; CHECK-NEXT:    s_waitcnt vmcnt(2)
1364; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:23
1365; CHECK-NEXT:    s_waitcnt vmcnt(1)
1366; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1367; CHECK-NEXT:    s_waitcnt vmcnt(0)
1368; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:16
1369; CHECK-NEXT:    s_setpc_b64 s[30:31]
1370entry:
1371  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
1372  ret void
1373}
1374
1375define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1376; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1:
1377; CHECK:       ; %bb.0: ; %entry
1378; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379; CHECK-NEXT:    s_clause 0x1
1380; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1381; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1382; CHECK-NEXT:    s_waitcnt vmcnt(1)
1383; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1384; CHECK-NEXT:    s_waitcnt vmcnt(0)
1385; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1386; CHECK-NEXT:    s_setpc_b64 s[30:31]
1387entry:
1388  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
1389  ret void
1390}
1391
1392define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1393; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2:
1394; CHECK:       ; %bb.0: ; %entry
1395; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1397; CHECK-NEXT:    s_waitcnt vmcnt(0)
1398; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1399; CHECK-NEXT:    s_setpc_b64 s[30:31]
1400entry:
1401  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
1402  ret void
1403}
1404
1405define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1406; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2:
1407; CHECK:       ; %bb.0: ; %entry
1408; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1409; CHECK-NEXT:    s_clause 0x2
1410; CHECK-NEXT:    global_load_dwordx2 v[8:9], v[2:3], off offset:23
1411; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1412; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:16
1413; CHECK-NEXT:    s_waitcnt vmcnt(2)
1414; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:23
1415; CHECK-NEXT:    s_waitcnt vmcnt(1)
1416; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1417; CHECK-NEXT:    s_waitcnt vmcnt(0)
1418; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:16
1419; CHECK-NEXT:    s_setpc_b64 s[30:31]
1420entry:
1421  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
1422  ret void
1423}
1424
1425define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1426; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2:
1427; CHECK:       ; %bb.0: ; %entry
1428; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; CHECK-NEXT:    s_clause 0x1
1430; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1431; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1432; CHECK-NEXT:    s_waitcnt vmcnt(1)
1433; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1434; CHECK-NEXT:    s_waitcnt vmcnt(0)
1435; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1436; CHECK-NEXT:    s_setpc_b64 s[30:31]
1437entry:
1438  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
1439  ret void
1440}
1441
1442define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1443; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8:
1444; CHECK:       ; %bb.0: ; %entry
1445; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1447; CHECK-NEXT:    s_waitcnt vmcnt(0)
1448; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1449; CHECK-NEXT:    s_setpc_b64 s[30:31]
1450entry:
1451  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
1452  ret void
1453}
1454
1455define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1456; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8:
1457; CHECK:       ; %bb.0: ; %entry
1458; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1459; CHECK-NEXT:    s_clause 0x1
1460; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:15
1461; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1462; CHECK-NEXT:    s_waitcnt vmcnt(1)
1463; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:15
1464; CHECK-NEXT:    s_waitcnt vmcnt(0)
1465; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1466; CHECK-NEXT:    s_setpc_b64 s[30:31]
1467entry:
1468  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
1469  ret void
1470}
1471
1472define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1473; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8:
1474; CHECK:       ; %bb.0: ; %entry
1475; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476; CHECK-NEXT:    s_clause 0x1
1477; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1478; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1479; CHECK-NEXT:    s_waitcnt vmcnt(1)
1480; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1481; CHECK-NEXT:    s_waitcnt vmcnt(0)
1482; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1483; CHECK-NEXT:    s_setpc_b64 s[30:31]
1484entry:
1485  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
1486  ret void
1487}
1488
1489define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1490; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16:
1491; CHECK:       ; %bb.0: ; %entry
1492; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1494; CHECK-NEXT:    s_waitcnt vmcnt(0)
1495; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1496; CHECK-NEXT:    s_setpc_b64 s[30:31]
1497entry:
1498  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
1499  ret void
1500}
1501
1502define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1503; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16:
1504; CHECK:       ; %bb.0: ; %entry
1505; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506; CHECK-NEXT:    s_clause 0x1
1507; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:15
1508; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1509; CHECK-NEXT:    s_waitcnt vmcnt(1)
1510; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:15
1511; CHECK-NEXT:    s_waitcnt vmcnt(0)
1512; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1513; CHECK-NEXT:    s_setpc_b64 s[30:31]
1514entry:
1515  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
1516  ret void
1517}
1518
1519define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1520; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16:
1521; CHECK:       ; %bb.0: ; %entry
1522; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523; CHECK-NEXT:    s_clause 0x1
1524; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1525; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1526; CHECK-NEXT:    s_waitcnt vmcnt(1)
1527; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1528; CHECK-NEXT:    s_waitcnt vmcnt(0)
1529; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1530; CHECK-NEXT:    s_setpc_b64 s[30:31]
1531entry:
1532  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
1533  ret void
1534}
1535
1536define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1537; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1:
1538; CHECK:       ; %bb.0: ; %entry
1539; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1540; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1541; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1542; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1543; CHECK-NEXT:    s_setpc_b64 s[30:31]
1544entry:
1545  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
1546  ret void
1547}
1548
1549define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1550; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1:
1551; CHECK:       ; %bb.0: ; %entry
1552; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553; CHECK-NEXT:    ds_read_b64 v[7:8], v2
1554; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:8
1555; CHECK-NEXT:    ds_read_b64 v[9:10], v2 offset:23
1556; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
1557; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off
1558; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1559; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:8
1560; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1561; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[9:10], off offset:23
1562; CHECK-NEXT:    s_setpc_b64 s[30:31]
1563entry:
1564  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
1565  ret void
1566}
1567
1568define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1569; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1:
1570; CHECK:       ; %bb.0: ; %entry
1571; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1572; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1573; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1574; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1575; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1576; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1577; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1578; CHECK-NEXT:    s_setpc_b64 s[30:31]
1579entry:
1580  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
1581  ret void
1582}
1583
1584define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1585; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2:
1586; CHECK:       ; %bb.0: ; %entry
1587; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1588; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1589; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1590; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1591; CHECK-NEXT:    s_setpc_b64 s[30:31]
1592entry:
1593  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
1594  ret void
1595}
1596
1597define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1598; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2:
1599; CHECK:       ; %bb.0: ; %entry
1600; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; CHECK-NEXT:    ds_read_b64 v[7:8], v2
1602; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:8
1603; CHECK-NEXT:    ds_read_b64 v[9:10], v2 offset:23
1604; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
1605; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off
1606; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1607; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:8
1608; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1609; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[9:10], off offset:23
1610; CHECK-NEXT:    s_setpc_b64 s[30:31]
1611entry:
1612  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
1613  ret void
1614}
1615
1616define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1617; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2:
1618; CHECK:       ; %bb.0: ; %entry
1619; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1621; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1622; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1623; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1624; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1625; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1626; CHECK-NEXT:    s_setpc_b64 s[30:31]
1627entry:
1628  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
1629  ret void
1630}
1631
1632define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1633; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8:
1634; CHECK:       ; %bb.0: ; %entry
1635; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1636; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
1637; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1638; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1639; CHECK-NEXT:    s_setpc_b64 s[30:31]
1640entry:
1641  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
1642  ret void
1643}
1644
1645define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1646; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8:
1647; CHECK:       ; %bb.0: ; %entry
1648; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1649; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset1:1
1650; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:15
1651; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1652; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1653; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1654; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:15
1655; CHECK-NEXT:    s_setpc_b64 s[30:31]
1656entry:
1657  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
1658  ret void
1659}
1660
1661define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1662; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8:
1663; CHECK:       ; %bb.0: ; %entry
1664; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset1:1
1666; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset0:2 offset1:3
1667; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1668; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1669; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1670; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1671; CHECK-NEXT:    s_setpc_b64 s[30:31]
1672entry:
1673  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
1674  ret void
1675}
1676
1677define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1678; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16:
1679; CHECK:       ; %bb.0: ; %entry
1680; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1682; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1683; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1684; CHECK-NEXT:    s_setpc_b64 s[30:31]
1685entry:
1686  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
1687  ret void
1688}
1689
1690define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1691; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16:
1692; CHECK:       ; %bb.0: ; %entry
1693; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1695; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:15
1696; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1697; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1698; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1699; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:15
1700; CHECK-NEXT:    s_setpc_b64 s[30:31]
1701entry:
1702  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
1703  ret void
1704}
1705
1706define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1707; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16:
1708; CHECK:       ; %bb.0: ; %entry
1709; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1710; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1711; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1712; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1713; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1714; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1715; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1716; CHECK-NEXT:    s_setpc_b64 s[30:31]
1717entry:
1718  tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
1719  ret void
1720}
1721
1722define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1723; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1:
1724; CHECK:       ; %bb.0: ; %entry
1725; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1727; CHECK-NEXT:    s_waitcnt vmcnt(0)
1728; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1729; CHECK-NEXT:    s_setpc_b64 s[30:31]
1730entry:
1731  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
1732  ret void
1733}
1734
1735define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1736; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1:
1737; CHECK:       ; %bb.0: ; %entry
1738; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1739; CHECK-NEXT:    s_clause 0x1
1740; CHECK-NEXT:    global_load_dwordx2 v[8:9], v[2:3], off
1741; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:8
1742; CHECK-NEXT:    s_waitcnt vmcnt(1)
1743; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off
1744; CHECK-NEXT:    s_waitcnt vmcnt(0)
1745; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:8
1746; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:23
1747; CHECK-NEXT:    s_waitcnt vmcnt(0)
1748; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:23
1749; CHECK-NEXT:    s_setpc_b64 s[30:31]
1750entry:
1751  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
1752  ret void
1753}
1754
1755define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1756; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1:
1757; CHECK:       ; %bb.0: ; %entry
1758; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759; CHECK-NEXT:    s_clause 0x1
1760; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1761; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off offset:16
1762; CHECK-NEXT:    s_waitcnt vmcnt(1)
1763; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1764; CHECK-NEXT:    s_waitcnt vmcnt(0)
1765; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:16
1766; CHECK-NEXT:    s_setpc_b64 s[30:31]
1767entry:
1768  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
1769  ret void
1770}
1771
1772define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1773; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2:
1774; CHECK:       ; %bb.0: ; %entry
1775; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1777; CHECK-NEXT:    s_waitcnt vmcnt(0)
1778; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1779; CHECK-NEXT:    s_setpc_b64 s[30:31]
1780entry:
1781  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
1782  ret void
1783}
1784
1785define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1786; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2:
1787; CHECK:       ; %bb.0: ; %entry
1788; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789; CHECK-NEXT:    s_clause 0x1
1790; CHECK-NEXT:    global_load_dwordx2 v[8:9], v[2:3], off
1791; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:8
1792; CHECK-NEXT:    s_waitcnt vmcnt(1)
1793; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off
1794; CHECK-NEXT:    s_waitcnt vmcnt(0)
1795; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:8
1796; CHECK-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off offset:23
1797; CHECK-NEXT:    s_waitcnt vmcnt(0)
1798; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:23
1799; CHECK-NEXT:    s_setpc_b64 s[30:31]
1800entry:
1801  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
1802  ret void
1803}
1804
1805define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1806; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2:
1807; CHECK:       ; %bb.0: ; %entry
1808; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809; CHECK-NEXT:    s_clause 0x1
1810; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1811; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off offset:16
1812; CHECK-NEXT:    s_waitcnt vmcnt(1)
1813; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1814; CHECK-NEXT:    s_waitcnt vmcnt(0)
1815; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off offset:16
1816; CHECK-NEXT:    s_setpc_b64 s[30:31]
1817entry:
1818  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
1819  ret void
1820}
1821
1822define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1823; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8:
1824; CHECK:       ; %bb.0: ; %entry
1825; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1827; CHECK-NEXT:    s_waitcnt vmcnt(0)
1828; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1829; CHECK-NEXT:    s_setpc_b64 s[30:31]
1830entry:
1831  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
1832  ret void
1833}
1834
1835define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1836; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8:
1837; CHECK:       ; %bb.0: ; %entry
1838; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1840; CHECK-NEXT:    s_waitcnt vmcnt(0)
1841; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1842; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:15
1843; CHECK-NEXT:    s_waitcnt vmcnt(0)
1844; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:15
1845; CHECK-NEXT:    s_setpc_b64 s[30:31]
1846entry:
1847  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
1848  ret void
1849}
1850
1851define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1852; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8:
1853; CHECK:       ; %bb.0: ; %entry
1854; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1855; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1856; CHECK-NEXT:    s_waitcnt vmcnt(0)
1857; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1858; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:16
1859; CHECK-NEXT:    s_waitcnt vmcnt(0)
1860; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:16
1861; CHECK-NEXT:    s_setpc_b64 s[30:31]
1862entry:
1863  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
1864  ret void
1865}
1866
1867define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1868; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16:
1869; CHECK:       ; %bb.0: ; %entry
1870; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1872; CHECK-NEXT:    s_waitcnt vmcnt(0)
1873; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1874; CHECK-NEXT:    s_setpc_b64 s[30:31]
1875entry:
1876  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
1877  ret void
1878}
1879
1880define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1881; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16:
1882; CHECK:       ; %bb.0: ; %entry
1883; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1885; CHECK-NEXT:    s_waitcnt vmcnt(0)
1886; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1887; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:15
1888; CHECK-NEXT:    s_waitcnt vmcnt(0)
1889; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:15
1890; CHECK-NEXT:    s_setpc_b64 s[30:31]
1891entry:
1892  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
1893  ret void
1894}
1895
1896define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
1897; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16:
1898; CHECK:       ; %bb.0: ; %entry
1899; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1901; CHECK-NEXT:    s_waitcnt vmcnt(0)
1902; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1903; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:16
1904; CHECK-NEXT:    s_waitcnt vmcnt(0)
1905; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:16
1906; CHECK-NEXT:    s_setpc_b64 s[30:31]
1907entry:
1908  tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
1909  ret void
1910}
1911
1912define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1913; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1:
1914; CHECK:       ; %bb.0: ; %entry
1915; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916; CHECK-NEXT:    s_clause 0x3
1917; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1918; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1919; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1920; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1921; CHECK-NEXT:    s_waitcnt vmcnt(0)
1922; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1923; CHECK-NEXT:    s_setpc_b64 s[30:31]
1924entry:
1925  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
1926  ret void
1927}
1928
1929define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1930; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1:
1931; CHECK:       ; %bb.0: ; %entry
1932; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1933; CHECK-NEXT:    s_clause 0x7
1934; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1935; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1936; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1937; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1938; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1939; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1940; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
1941; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
1942; CHECK-NEXT:    s_waitcnt vmcnt(4)
1943; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1944; CHECK-NEXT:    s_waitcnt vmcnt(2)
1945; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off offset:16
1946; CHECK-NEXT:    s_waitcnt vmcnt(0)
1947; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[9:10], off offset:23
1948; CHECK-NEXT:    s_setpc_b64 s[30:31]
1949entry:
1950  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
1951  ret void
1952}
1953
1954define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
1955; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1:
1956; CHECK:       ; %bb.0: ; %entry
1957; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958; CHECK-NEXT:    s_clause 0x7
1959; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1960; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1961; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1962; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1963; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1964; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1965; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1966; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1967; CHECK-NEXT:    s_waitcnt vmcnt(4)
1968; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1969; CHECK-NEXT:    s_waitcnt vmcnt(0)
1970; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1971; CHECK-NEXT:    s_setpc_b64 s[30:31]
1972entry:
1973  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
1974  ret void
1975}
1976
1977define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1978; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2:
1979; CHECK:       ; %bb.0: ; %entry
1980; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981; CHECK-NEXT:    s_clause 0x3
1982; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1983; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1984; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1985; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1986; CHECK-NEXT:    s_waitcnt vmcnt(0)
1987; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1988; CHECK-NEXT:    s_setpc_b64 s[30:31]
1989entry:
1990  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
1991  ret void
1992}
1993
1994define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1995; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2:
1996; CHECK:       ; %bb.0: ; %entry
1997; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998; CHECK-NEXT:    s_clause 0x7
1999; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2000; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2001; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2002; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2003; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2004; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2005; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2006; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2007; CHECK-NEXT:    s_waitcnt vmcnt(4)
2008; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2009; CHECK-NEXT:    s_waitcnt vmcnt(2)
2010; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off offset:16
2011; CHECK-NEXT:    s_waitcnt vmcnt(0)
2012; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[9:10], off offset:23
2013; CHECK-NEXT:    s_setpc_b64 s[30:31]
2014entry:
2015  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
2016  ret void
2017}
2018
2019define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
2020; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2:
2021; CHECK:       ; %bb.0: ; %entry
2022; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023; CHECK-NEXT:    s_clause 0x7
2024; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2025; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2026; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2027; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2028; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2029; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2030; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2031; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2032; CHECK-NEXT:    s_waitcnt vmcnt(4)
2033; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2034; CHECK-NEXT:    s_waitcnt vmcnt(0)
2035; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2036; CHECK-NEXT:    s_setpc_b64 s[30:31]
2037entry:
2038  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
2039  ret void
2040}
2041
2042define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2043; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8:
2044; CHECK:       ; %bb.0: ; %entry
2045; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2046; CHECK-NEXT:    s_clause 0x3
2047; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2048; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2049; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2050; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2051; CHECK-NEXT:    s_waitcnt vmcnt(0)
2052; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2053; CHECK-NEXT:    s_setpc_b64 s[30:31]
2054entry:
2055  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
2056  ret void
2057}
2058
2059define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2060; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8:
2061; CHECK:       ; %bb.0: ; %entry
2062; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063; CHECK-NEXT:    s_clause 0x7
2064; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2065; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2066; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2067; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2068; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
2069; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
2070; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2071; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2072; CHECK-NEXT:    s_waitcnt vmcnt(4)
2073; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2074; CHECK-NEXT:    s_waitcnt vmcnt(0)
2075; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:15
2076; CHECK-NEXT:    s_setpc_b64 s[30:31]
2077entry:
2078  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
2079  ret void
2080}
2081
2082define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2083; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8:
2084; CHECK:       ; %bb.0: ; %entry
2085; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2086; CHECK-NEXT:    s_clause 0x7
2087; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2088; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2089; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2090; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2091; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2092; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2093; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2094; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2095; CHECK-NEXT:    s_waitcnt vmcnt(4)
2096; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2097; CHECK-NEXT:    s_waitcnt vmcnt(0)
2098; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2099; CHECK-NEXT:    s_setpc_b64 s[30:31]
2100entry:
2101  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
2102  ret void
2103}
2104
2105define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2106; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16:
2107; CHECK:       ; %bb.0: ; %entry
2108; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109; CHECK-NEXT:    s_clause 0x3
2110; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2111; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2112; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2113; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2114; CHECK-NEXT:    s_waitcnt vmcnt(0)
2115; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2116; CHECK-NEXT:    s_setpc_b64 s[30:31]
2117entry:
2118  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
2119  ret void
2120}
2121
2122define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2123; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16:
2124; CHECK:       ; %bb.0: ; %entry
2125; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126; CHECK-NEXT:    s_clause 0x7
2127; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2128; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2129; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2130; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2131; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
2132; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
2133; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
2134; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
2135; CHECK-NEXT:    s_waitcnt vmcnt(4)
2136; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2137; CHECK-NEXT:    s_waitcnt vmcnt(0)
2138; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:15
2139; CHECK-NEXT:    s_setpc_b64 s[30:31]
2140entry:
2141  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
2142  ret void
2143}
2144
2145define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2146; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16:
2147; CHECK:       ; %bb.0: ; %entry
2148; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149; CHECK-NEXT:    s_clause 0x7
2150; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2151; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2152; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2153; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2154; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2155; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2156; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2157; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2158; CHECK-NEXT:    s_waitcnt vmcnt(4)
2159; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2160; CHECK-NEXT:    s_waitcnt vmcnt(0)
2161; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2162; CHECK-NEXT:    s_setpc_b64 s[30:31]
2163entry:
2164  tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
2165  ret void
2166}
2167
2168define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2169; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1:
2170; CHECK:       ; %bb.0: ; %entry
2171; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2173; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2174; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2175; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2176; CHECK-NEXT:    s_setpc_b64 s[30:31]
2177entry:
2178  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
2179  ret void
2180}
2181
2182define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2183; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1:
2184; CHECK:       ; %bb.0: ; %entry
2185; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186; CHECK-NEXT:    s_clause 0x2
2187; CHECK-NEXT:    flat_load_dwordx2 v[5:6], v[1:2] offset:23
2188; CHECK-NEXT:    flat_load_dwordx2 v[7:8], v[1:2] offset:16
2189; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2190; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
2191; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:23
2192; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
2193; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2194; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
2195; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2196; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2197; CHECK-NEXT:    s_setpc_b64 s[30:31]
2198entry:
2199  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
2200  ret void
2201}
2202
2203define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2204; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1:
2205; CHECK:       ; %bb.0: ; %entry
2206; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2207; CHECK-NEXT:    s_clause 0x1
2208; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2209; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2210; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2211; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2212; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2213; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2214; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2215; CHECK-NEXT:    s_setpc_b64 s[30:31]
2216entry:
2217  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
2218  ret void
2219}
2220
2221define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2222; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2:
2223; CHECK:       ; %bb.0: ; %entry
2224; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2225; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2226; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2227; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2228; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2229; CHECK-NEXT:    s_setpc_b64 s[30:31]
2230entry:
2231  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
2232  ret void
2233}
2234
2235define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2236; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2:
2237; CHECK:       ; %bb.0: ; %entry
2238; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2239; CHECK-NEXT:    s_clause 0x2
2240; CHECK-NEXT:    flat_load_dwordx2 v[5:6], v[1:2] offset:23
2241; CHECK-NEXT:    flat_load_dwordx2 v[7:8], v[1:2] offset:16
2242; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2243; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
2244; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:23
2245; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
2246; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2247; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
2248; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2249; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2250; CHECK-NEXT:    s_setpc_b64 s[30:31]
2251entry:
2252  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
2253  ret void
2254}
2255
2256define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2257; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2:
2258; CHECK:       ; %bb.0: ; %entry
2259; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260; CHECK-NEXT:    s_clause 0x1
2261; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2262; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2263; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2264; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2265; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2266; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2267; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2268; CHECK-NEXT:    s_setpc_b64 s[30:31]
2269entry:
2270  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
2271  ret void
2272}
2273
2274define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2275; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8:
2276; CHECK:       ; %bb.0: ; %entry
2277; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2279; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2280; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2281; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2282; CHECK-NEXT:    s_setpc_b64 s[30:31]
2283entry:
2284  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
2285  ret void
2286}
2287
2288define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2289; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8:
2290; CHECK:       ; %bb.0: ; %entry
2291; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292; CHECK-NEXT:    s_clause 0x1
2293; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2]
2294; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2] offset:15
2295; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2296; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2297; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2298; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:15
2299; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2300; CHECK-NEXT:    s_setpc_b64 s[30:31]
2301entry:
2302  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
2303  ret void
2304}
2305
2306define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2307; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8:
2308; CHECK:       ; %bb.0: ; %entry
2309; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310; CHECK-NEXT:    s_clause 0x1
2311; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2312; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2313; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2314; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2315; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2316; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2317; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2318; CHECK-NEXT:    s_setpc_b64 s[30:31]
2319entry:
2320  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
2321  ret void
2322}
2323
2324define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2325; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16:
2326; CHECK:       ; %bb.0: ; %entry
2327; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2329; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2330; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2331; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2332; CHECK-NEXT:    s_setpc_b64 s[30:31]
2333entry:
2334  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
2335  ret void
2336}
2337
2338define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2339; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16:
2340; CHECK:       ; %bb.0: ; %entry
2341; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2342; CHECK-NEXT:    s_clause 0x1
2343; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:15
2344; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2345; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2346; CHECK-NEXT:    ds_write_b128 v0, v[3:6] offset:15
2347; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2348; CHECK-NEXT:    ds_write_b128 v0, v[7:10]
2349; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2350; CHECK-NEXT:    s_setpc_b64 s[30:31]
2351entry:
2352  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
2353  ret void
2354}
2355
2356define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2357; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16:
2358; CHECK:       ; %bb.0: ; %entry
2359; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2360; CHECK-NEXT:    s_clause 0x1
2361; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2362; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2363; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2364; CHECK-NEXT:    ds_write_b128 v0, v[3:6] offset:16
2365; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2366; CHECK-NEXT:    ds_write_b128 v0, v[7:10]
2367; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2368; CHECK-NEXT:    s_setpc_b64 s[30:31]
2369entry:
2370  tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
2371  ret void
2372}
2373
2374define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2375; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1:
2376; CHECK:       ; %bb.0: ; %entry
2377; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2378; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2379; CHECK-NEXT:    s_waitcnt vmcnt(0)
2380; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2381; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2382; CHECK-NEXT:    s_setpc_b64 s[30:31]
2383entry:
2384  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
2385  ret void
2386}
2387
2388define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2389; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1:
2390; CHECK:       ; %bb.0: ; %entry
2391; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392; CHECK-NEXT:    s_clause 0x2
2393; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2394; CHECK-NEXT:    global_load_dwordx2 v[7:8], v[1:2], off offset:16
2395; CHECK-NEXT:    global_load_dwordx2 v[1:2], v[1:2], off offset:23
2396; CHECK-NEXT:    s_waitcnt vmcnt(2)
2397; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2398; CHECK-NEXT:    s_waitcnt vmcnt(1)
2399; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2400; CHECK-NEXT:    s_waitcnt vmcnt(0)
2401; CHECK-NEXT:    ds_write_b64 v0, v[1:2] offset:23
2402; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2403; CHECK-NEXT:    s_setpc_b64 s[30:31]
2404entry:
2405  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
2406  ret void
2407}
2408
2409define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2410; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1:
2411; CHECK:       ; %bb.0: ; %entry
2412; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2413; CHECK-NEXT:    s_clause 0x1
2414; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2415; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2416; CHECK-NEXT:    s_waitcnt vmcnt(1)
2417; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2418; CHECK-NEXT:    s_waitcnt vmcnt(0)
2419; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2420; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2421; CHECK-NEXT:    s_setpc_b64 s[30:31]
2422entry:
2423  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
2424  ret void
2425}
2426
2427define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2428; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2:
2429; CHECK:       ; %bb.0: ; %entry
2430; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2431; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2432; CHECK-NEXT:    s_waitcnt vmcnt(0)
2433; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2434; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2435; CHECK-NEXT:    s_setpc_b64 s[30:31]
2436entry:
2437  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
2438  ret void
2439}
2440
2441define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2442; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2:
2443; CHECK:       ; %bb.0: ; %entry
2444; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2445; CHECK-NEXT:    s_clause 0x2
2446; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2447; CHECK-NEXT:    global_load_dwordx2 v[7:8], v[1:2], off offset:16
2448; CHECK-NEXT:    global_load_dwordx2 v[1:2], v[1:2], off offset:23
2449; CHECK-NEXT:    s_waitcnt vmcnt(2)
2450; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2451; CHECK-NEXT:    s_waitcnt vmcnt(1)
2452; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2453; CHECK-NEXT:    s_waitcnt vmcnt(0)
2454; CHECK-NEXT:    ds_write_b64 v0, v[1:2] offset:23
2455; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2456; CHECK-NEXT:    s_setpc_b64 s[30:31]
2457entry:
2458  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
2459  ret void
2460}
2461
2462define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2463; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2:
2464; CHECK:       ; %bb.0: ; %entry
2465; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466; CHECK-NEXT:    s_clause 0x1
2467; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2468; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2469; CHECK-NEXT:    s_waitcnt vmcnt(1)
2470; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2471; CHECK-NEXT:    s_waitcnt vmcnt(0)
2472; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2473; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2474; CHECK-NEXT:    s_setpc_b64 s[30:31]
2475entry:
2476  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
2477  ret void
2478}
2479
2480define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2481; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8:
2482; CHECK:       ; %bb.0: ; %entry
2483; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2485; CHECK-NEXT:    s_waitcnt vmcnt(0)
2486; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2487; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2488; CHECK-NEXT:    s_setpc_b64 s[30:31]
2489entry:
2490  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
2491  ret void
2492}
2493
2494define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2495; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8:
2496; CHECK:       ; %bb.0: ; %entry
2497; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498; CHECK-NEXT:    s_clause 0x1
2499; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2500; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
2501; CHECK-NEXT:    s_waitcnt vmcnt(1)
2502; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2503; CHECK-NEXT:    s_waitcnt vmcnt(0)
2504; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:15
2505; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2506; CHECK-NEXT:    s_setpc_b64 s[30:31]
2507entry:
2508  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
2509  ret void
2510}
2511
2512define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2513; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8:
2514; CHECK:       ; %bb.0: ; %entry
2515; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2516; CHECK-NEXT:    s_clause 0x1
2517; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2518; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2519; CHECK-NEXT:    s_waitcnt vmcnt(1)
2520; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2521; CHECK-NEXT:    s_waitcnt vmcnt(0)
2522; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2523; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2524; CHECK-NEXT:    s_setpc_b64 s[30:31]
2525entry:
2526  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
2527  ret void
2528}
2529
2530define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2531; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16:
2532; CHECK:       ; %bb.0: ; %entry
2533; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2534; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2535; CHECK-NEXT:    s_waitcnt vmcnt(0)
2536; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2537; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2538; CHECK-NEXT:    s_setpc_b64 s[30:31]
2539entry:
2540  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
2541  ret void
2542}
2543
2544define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2545; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16:
2546; CHECK:       ; %bb.0: ; %entry
2547; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; CHECK-NEXT:    s_clause 0x1
2549; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2550; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
2551; CHECK-NEXT:    s_waitcnt vmcnt(1)
2552; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2553; CHECK-NEXT:    s_waitcnt vmcnt(0)
2554; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:15
2555; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2556; CHECK-NEXT:    s_setpc_b64 s[30:31]
2557entry:
2558  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
2559  ret void
2560}
2561
2562define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2563; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16:
2564; CHECK:       ; %bb.0: ; %entry
2565; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566; CHECK-NEXT:    s_clause 0x1
2567; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2568; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2569; CHECK-NEXT:    s_waitcnt vmcnt(1)
2570; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2571; CHECK-NEXT:    s_waitcnt vmcnt(0)
2572; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:16
2573; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2574; CHECK-NEXT:    s_setpc_b64 s[30:31]
2575entry:
2576  tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
2577  ret void
2578}
2579
2580define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2581; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1:
2582; CHECK:       ; %bb.0: ; %entry
2583; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2585; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2586; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2587; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2588; CHECK-NEXT:    s_setpc_b64 s[30:31]
2589entry:
2590  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
2591  ret void
2592}
2593
2594define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2595; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1:
2596; CHECK:       ; %bb.0: ; %entry
2597; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2598; CHECK-NEXT:    ds_read_b64 v[5:6], v1 offset:23
2599; CHECK-NEXT:    ds_read_b64 v[7:8], v1 offset:16
2600; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2601; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2602; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:23
2603; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2604; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2605; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2606; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2607; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2608; CHECK-NEXT:    s_setpc_b64 s[30:31]
2609entry:
2610  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
2611  ret void
2612}
2613
2614define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2615; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1:
2616; CHECK:       ; %bb.0: ; %entry
2617; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2618; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2619; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2620; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2621; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2622; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2623; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2624; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2625; CHECK-NEXT:    s_setpc_b64 s[30:31]
2626entry:
2627  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
2628  ret void
2629}
2630
2631define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2632; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2:
2633; CHECK:       ; %bb.0: ; %entry
2634; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2635; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2636; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2637; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2638; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2639; CHECK-NEXT:    s_setpc_b64 s[30:31]
2640entry:
2641  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
2642  ret void
2643}
2644
2645define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2646; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2:
2647; CHECK:       ; %bb.0: ; %entry
2648; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649; CHECK-NEXT:    ds_read_b64 v[5:6], v1 offset:23
2650; CHECK-NEXT:    ds_read_b64 v[7:8], v1 offset:16
2651; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2652; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2653; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:23
2654; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2655; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2656; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
2657; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2658; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2659; CHECK-NEXT:    s_setpc_b64 s[30:31]
2660entry:
2661  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
2662  ret void
2663}
2664
2665define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2666; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2:
2667; CHECK:       ; %bb.0: ; %entry
2668; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2669; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2670; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2671; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2672; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2673; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2674; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2675; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2676; CHECK-NEXT:    s_setpc_b64 s[30:31]
2677entry:
2678  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
2679  ret void
2680}
2681
2682define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2683; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8:
2684; CHECK:       ; %bb.0: ; %entry
2685; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2686; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2687; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2688; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2689; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2690; CHECK-NEXT:    s_setpc_b64 s[30:31]
2691entry:
2692  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
2693  ret void
2694}
2695
2696define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2697; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8:
2698; CHECK:       ; %bb.0: ; %entry
2699; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2700; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
2701; CHECK-NEXT:    ds_read_b128 v[6:9], v1 offset:15
2702; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2703; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
2704; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2705; CHECK-NEXT:    ds_write_b128 v0, v[6:9] offset:15
2706; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2707; CHECK-NEXT:    s_setpc_b64 s[30:31]
2708entry:
2709  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
2710  ret void
2711}
2712
2713define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2714; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8:
2715; CHECK:       ; %bb.0: ; %entry
2716; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2717; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2718; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2719; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2720; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2721; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2722; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2723; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2724; CHECK-NEXT:    s_setpc_b64 s[30:31]
2725entry:
2726  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
2727  ret void
2728}
2729
2730define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2731; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16:
2732; CHECK:       ; %bb.0: ; %entry
2733; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734; CHECK-NEXT:    ds_read_b128 v[1:4], v1
2735; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2736; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2737; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2738; CHECK-NEXT:    s_setpc_b64 s[30:31]
2739entry:
2740  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
2741  ret void
2742}
2743
2744define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2745; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16:
2746; CHECK:       ; %bb.0: ; %entry
2747; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2748; CHECK-NEXT:    ds_read_b128 v[2:5], v1 offset:15
2749; CHECK-NEXT:    ds_read_b128 v[6:9], v1
2750; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2751; CHECK-NEXT:    ds_write_b128 v0, v[2:5] offset:15
2752; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2753; CHECK-NEXT:    ds_write_b128 v0, v[6:9]
2754; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2755; CHECK-NEXT:    s_setpc_b64 s[30:31]
2756entry:
2757  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
2758  ret void
2759}
2760
2761define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2762; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16:
2763; CHECK:       ; %bb.0: ; %entry
2764; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2765; CHECK-NEXT:    ds_read_b128 v[2:5], v1 offset:16
2766; CHECK-NEXT:    ds_read_b128 v[6:9], v1
2767; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2768; CHECK-NEXT:    ds_write_b128 v0, v[2:5] offset:16
2769; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2770; CHECK-NEXT:    ds_write_b128 v0, v[6:9]
2771; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2772; CHECK-NEXT:    s_setpc_b64 s[30:31]
2773entry:
2774  tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
2775  ret void
2776}
2777
2778define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2779; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1:
2780; CHECK:       ; %bb.0: ; %entry
2781; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2783; CHECK-NEXT:    s_waitcnt vmcnt(0)
2784; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2785; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2786; CHECK-NEXT:    s_setpc_b64 s[30:31]
2787entry:
2788  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
2789  ret void
2790}
2791
2792define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2793; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1:
2794; CHECK:       ; %bb.0: ; %entry
2795; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2796; CHECK-NEXT:    s_clause 0x2
2797; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2798; CHECK-NEXT:    global_load_dwordx2 v[7:8], v[1:2], off offset:16
2799; CHECK-NEXT:    global_load_dwordx2 v[1:2], v[1:2], off offset:23
2800; CHECK-NEXT:    s_waitcnt vmcnt(2)
2801; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2802; CHECK-NEXT:    s_waitcnt vmcnt(1)
2803; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2804; CHECK-NEXT:    s_waitcnt vmcnt(0)
2805; CHECK-NEXT:    ds_write_b64 v0, v[1:2] offset:23
2806; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2807; CHECK-NEXT:    s_setpc_b64 s[30:31]
2808entry:
2809  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
2810  ret void
2811}
2812
2813define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
2814; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1:
2815; CHECK:       ; %bb.0: ; %entry
2816; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2817; CHECK-NEXT:    s_clause 0x1
2818; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2819; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2820; CHECK-NEXT:    s_waitcnt vmcnt(1)
2821; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2822; CHECK-NEXT:    s_waitcnt vmcnt(0)
2823; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2824; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2825; CHECK-NEXT:    s_setpc_b64 s[30:31]
2826entry:
2827  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
2828  ret void
2829}
2830
2831define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2832; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2:
2833; CHECK:       ; %bb.0: ; %entry
2834; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2835; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2836; CHECK-NEXT:    s_waitcnt vmcnt(0)
2837; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2838; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2839; CHECK-NEXT:    s_setpc_b64 s[30:31]
2840entry:
2841  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
2842  ret void
2843}
2844
2845define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2846; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2:
2847; CHECK:       ; %bb.0: ; %entry
2848; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2849; CHECK-NEXT:    s_clause 0x2
2850; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2851; CHECK-NEXT:    global_load_dwordx2 v[7:8], v[1:2], off offset:16
2852; CHECK-NEXT:    global_load_dwordx2 v[1:2], v[1:2], off offset:23
2853; CHECK-NEXT:    s_waitcnt vmcnt(2)
2854; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2855; CHECK-NEXT:    s_waitcnt vmcnt(1)
2856; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2857; CHECK-NEXT:    s_waitcnt vmcnt(0)
2858; CHECK-NEXT:    ds_write_b64 v0, v[1:2] offset:23
2859; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2860; CHECK-NEXT:    s_setpc_b64 s[30:31]
2861entry:
2862  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
2863  ret void
2864}
2865
2866define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
2867; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2:
2868; CHECK:       ; %bb.0: ; %entry
2869; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870; CHECK-NEXT:    s_clause 0x1
2871; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2872; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2873; CHECK-NEXT:    s_waitcnt vmcnt(1)
2874; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2875; CHECK-NEXT:    s_waitcnt vmcnt(0)
2876; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2877; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2878; CHECK-NEXT:    s_setpc_b64 s[30:31]
2879entry:
2880  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
2881  ret void
2882}
2883
2884define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2885; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8:
2886; CHECK:       ; %bb.0: ; %entry
2887; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2888; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2889; CHECK-NEXT:    s_waitcnt vmcnt(0)
2890; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2891; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2892; CHECK-NEXT:    s_setpc_b64 s[30:31]
2893entry:
2894  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
2895  ret void
2896}
2897
2898define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2899; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8:
2900; CHECK:       ; %bb.0: ; %entry
2901; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2902; CHECK-NEXT:    s_clause 0x1
2903; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2904; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
2905; CHECK-NEXT:    s_waitcnt vmcnt(1)
2906; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2907; CHECK-NEXT:    s_waitcnt vmcnt(0)
2908; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:15
2909; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2910; CHECK-NEXT:    s_setpc_b64 s[30:31]
2911entry:
2912  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
2913  ret void
2914}
2915
2916define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
2917; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8:
2918; CHECK:       ; %bb.0: ; %entry
2919; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2920; CHECK-NEXT:    s_clause 0x1
2921; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2922; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2923; CHECK-NEXT:    s_waitcnt vmcnt(1)
2924; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2925; CHECK-NEXT:    s_waitcnt vmcnt(0)
2926; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2927; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2928; CHECK-NEXT:    s_setpc_b64 s[30:31]
2929entry:
2930  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
2931  ret void
2932}
2933
2934define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2935; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16:
2936; CHECK:       ; %bb.0: ; %entry
2937; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2938; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2939; CHECK-NEXT:    s_waitcnt vmcnt(0)
2940; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2941; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2942; CHECK-NEXT:    s_setpc_b64 s[30:31]
2943entry:
2944  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
2945  ret void
2946}
2947
2948define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2949; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16:
2950; CHECK:       ; %bb.0: ; %entry
2951; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2952; CHECK-NEXT:    s_clause 0x1
2953; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2954; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
2955; CHECK-NEXT:    s_waitcnt vmcnt(1)
2956; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2957; CHECK-NEXT:    s_waitcnt vmcnt(0)
2958; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:15
2959; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2960; CHECK-NEXT:    s_setpc_b64 s[30:31]
2961entry:
2962  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
2963  ret void
2964}
2965
2966define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2967; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16:
2968; CHECK:       ; %bb.0: ; %entry
2969; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2970; CHECK-NEXT:    s_clause 0x1
2971; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2972; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2973; CHECK-NEXT:    s_waitcnt vmcnt(1)
2974; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2975; CHECK-NEXT:    s_waitcnt vmcnt(0)
2976; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:16
2977; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2978; CHECK-NEXT:    s_setpc_b64 s[30:31]
2979entry:
2980  tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
2981  ret void
2982}
2983
2984define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
2985; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1:
2986; CHECK:       ; %bb.0: ; %entry
2987; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2988; CHECK-NEXT:    s_clause 0x3
2989; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
2990; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
2991; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
2992; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
2993; CHECK-NEXT:    s_waitcnt vmcnt(0)
2994; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
2995; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2996; CHECK-NEXT:    s_setpc_b64 s[30:31]
2997entry:
2998  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
2999  ret void
3000}
3001
3002define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3003; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1:
3004; CHECK:       ; %bb.0: ; %entry
3005; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3006; CHECK-NEXT:    s_clause 0x7
3007; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3008; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3009; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3010; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3011; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3012; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3013; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3014; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3015; CHECK-NEXT:    s_waitcnt vmcnt(4)
3016; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3017; CHECK-NEXT:    s_waitcnt vmcnt(2)
3018; CHECK-NEXT:    ds_write_b64 v0, v[6:7] offset:16
3019; CHECK-NEXT:    s_waitcnt vmcnt(0)
3020; CHECK-NEXT:    ds_write_b64 v0, v[8:9] offset:23
3021; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3022; CHECK-NEXT:    s_setpc_b64 s[30:31]
3023entry:
3024  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
3025  ret void
3026}
3027
3028define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3029; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1:
3030; CHECK:       ; %bb.0: ; %entry
3031; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032; CHECK-NEXT:    s_clause 0x7
3033; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3034; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3035; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3036; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3037; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3038; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3039; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3040; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3041; CHECK-NEXT:    s_waitcnt vmcnt(4)
3042; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3043; CHECK-NEXT:    s_waitcnt vmcnt(0)
3044; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3045; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3046; CHECK-NEXT:    s_setpc_b64 s[30:31]
3047entry:
3048  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
3049  ret void
3050}
3051
3052define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3053; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2:
3054; CHECK:       ; %bb.0: ; %entry
3055; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3056; CHECK-NEXT:    s_clause 0x3
3057; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3058; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3059; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3060; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3061; CHECK-NEXT:    s_waitcnt vmcnt(0)
3062; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3063; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3064; CHECK-NEXT:    s_setpc_b64 s[30:31]
3065entry:
3066  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
3067  ret void
3068}
3069
3070define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3071; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2:
3072; CHECK:       ; %bb.0: ; %entry
3073; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074; CHECK-NEXT:    s_clause 0x7
3075; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3076; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3077; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3078; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3079; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3080; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3081; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3082; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3083; CHECK-NEXT:    s_waitcnt vmcnt(4)
3084; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3085; CHECK-NEXT:    s_waitcnt vmcnt(2)
3086; CHECK-NEXT:    ds_write_b64 v0, v[6:7] offset:16
3087; CHECK-NEXT:    s_waitcnt vmcnt(0)
3088; CHECK-NEXT:    ds_write_b64 v0, v[8:9] offset:23
3089; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3090; CHECK-NEXT:    s_setpc_b64 s[30:31]
3091entry:
3092  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
3093  ret void
3094}
3095
3096define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3097; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2:
3098; CHECK:       ; %bb.0: ; %entry
3099; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3100; CHECK-NEXT:    s_clause 0x7
3101; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3102; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3103; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3104; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3105; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3106; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3107; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3108; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3109; CHECK-NEXT:    s_waitcnt vmcnt(4)
3110; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3111; CHECK-NEXT:    s_waitcnt vmcnt(0)
3112; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3113; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3114; CHECK-NEXT:    s_setpc_b64 s[30:31]
3115entry:
3116  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
3117  ret void
3118}
3119
3120define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3121; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8:
3122; CHECK:       ; %bb.0: ; %entry
3123; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3124; CHECK-NEXT:    s_clause 0x3
3125; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3126; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3127; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3128; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3129; CHECK-NEXT:    s_waitcnt vmcnt(0)
3130; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3131; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3132; CHECK-NEXT:    s_setpc_b64 s[30:31]
3133entry:
3134  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
3135  ret void
3136}
3137
3138define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3139; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8:
3140; CHECK:       ; %bb.0: ; %entry
3141; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3142; CHECK-NEXT:    s_clause 0x7
3143; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
3144; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
3145; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
3146; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:12
3147; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
3148; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
3149; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
3150; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
3151; CHECK-NEXT:    s_waitcnt vmcnt(4)
3152; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
3153; CHECK-NEXT:    s_waitcnt vmcnt(0)
3154; CHECK-NEXT:    ds_write_b128 v0, v[2:5] offset:15
3155; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3156; CHECK-NEXT:    s_setpc_b64 s[30:31]
3157entry:
3158  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
3159  ret void
3160}
3161
3162define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3163; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8:
3164; CHECK:       ; %bb.0: ; %entry
3165; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3166; CHECK-NEXT:    s_clause 0x7
3167; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3168; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3169; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3170; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3171; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3172; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3173; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3174; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3175; CHECK-NEXT:    s_waitcnt vmcnt(4)
3176; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3177; CHECK-NEXT:    s_waitcnt vmcnt(0)
3178; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3179; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3180; CHECK-NEXT:    s_setpc_b64 s[30:31]
3181entry:
3182  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
3183  ret void
3184}
3185
3186define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3187; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16:
3188; CHECK:       ; %bb.0: ; %entry
3189; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3190; CHECK-NEXT:    s_clause 0x3
3191; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3192; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3193; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3194; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3195; CHECK-NEXT:    s_waitcnt vmcnt(0)
3196; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3197; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3198; CHECK-NEXT:    s_setpc_b64 s[30:31]
3199entry:
3200  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
3201  ret void
3202}
3203
3204define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3205; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16:
3206; CHECK:       ; %bb.0: ; %entry
3207; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3208; CHECK-NEXT:    s_clause 0x7
3209; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3210; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3211; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3212; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3213; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:15
3214; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:19
3215; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
3216; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
3217; CHECK-NEXT:    s_waitcnt vmcnt(4)
3218; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3219; CHECK-NEXT:    s_waitcnt vmcnt(0)
3220; CHECK-NEXT:    ds_write_b128 v0, v[6:9] offset:15
3221; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3222; CHECK-NEXT:    s_setpc_b64 s[30:31]
3223entry:
3224  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
3225  ret void
3226}
3227
3228define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3229; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16:
3230; CHECK:       ; %bb.0: ; %entry
3231; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232; CHECK-NEXT:    s_clause 0x7
3233; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3234; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3235; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3236; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3237; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3238; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3239; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3240; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3241; CHECK-NEXT:    s_waitcnt vmcnt(4)
3242; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3243; CHECK-NEXT:    s_waitcnt vmcnt(0)
3244; CHECK-NEXT:    ds_write_b128 v0, v[6:9] offset:16
3245; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3246; CHECK-NEXT:    s_setpc_b64 s[30:31]
3247entry:
3248  tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
3249  ret void
3250}
3251
3252define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3253; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1:
3254; CHECK:       ; %bb.0: ; %entry
3255; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3256; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3257; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3258; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3259; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3260; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3261; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3262; CHECK-NEXT:    s_setpc_b64 s[30:31]
3263entry:
3264  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
3265  ret void
3266}
3267
3268define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3269; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1:
3270; CHECK:       ; %bb.0: ; %entry
3271; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3272; CHECK-NEXT:    s_clause 0x3
3273; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
3274; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
3275; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
3276; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3277; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
3278; CHECK-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3279; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3280; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3281; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3282; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3283; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3284; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3285; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3286; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3287; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3288; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3289; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3290; CHECK-NEXT:    s_setpc_b64 s[30:31]
3291entry:
3292  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
3293  ret void
3294}
3295
3296define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3297; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1:
3298; CHECK:       ; %bb.0: ; %entry
3299; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3300; CHECK-NEXT:    s_clause 0x1
3301; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3302; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3303; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3304; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3305; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3306; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3307; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3308; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3309; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3310; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3311; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3312; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3313; CHECK-NEXT:    s_setpc_b64 s[30:31]
3314entry:
3315  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
3316  ret void
3317}
3318
3319define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3320; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2:
3321; CHECK:       ; %bb.0: ; %entry
3322; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3324; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3325; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3326; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3327; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3328; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3329; CHECK-NEXT:    s_setpc_b64 s[30:31]
3330entry:
3331  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
3332  ret void
3333}
3334
3335define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3336; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2:
3337; CHECK:       ; %bb.0: ; %entry
3338; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339; CHECK-NEXT:    s_clause 0x3
3340; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
3341; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
3342; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
3343; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3344; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
3345; CHECK-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3346; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3347; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3348; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3349; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3350; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3351; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3352; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3353; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3354; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3355; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3356; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3357; CHECK-NEXT:    s_setpc_b64 s[30:31]
3358entry:
3359  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
3360  ret void
3361}
3362
3363define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3364; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2:
3365; CHECK:       ; %bb.0: ; %entry
3366; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3367; CHECK-NEXT:    s_clause 0x1
3368; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3369; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3370; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3371; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3372; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3373; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3374; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3375; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3376; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3377; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3378; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3379; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3380; CHECK-NEXT:    s_setpc_b64 s[30:31]
3381entry:
3382  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
3383  ret void
3384}
3385
3386define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3387; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8:
3388; CHECK:       ; %bb.0: ; %entry
3389; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3390; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3391; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3392; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3393; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3394; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3395; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3396; CHECK-NEXT:    s_setpc_b64 s[30:31]
3397entry:
3398  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
3399  ret void
3400}
3401
3402define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3403; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8:
3404; CHECK:       ; %bb.0: ; %entry
3405; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3406; CHECK-NEXT:    s_clause 0x1
3407; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:15
3408; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3409; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3410; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
3411; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
3412; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
3413; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
3414; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3415; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3416; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3417; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3418; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3419; CHECK-NEXT:    s_setpc_b64 s[30:31]
3420entry:
3421  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
3422  ret void
3423}
3424
3425define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3426; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8:
3427; CHECK:       ; %bb.0: ; %entry
3428; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429; CHECK-NEXT:    s_clause 0x1
3430; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3431; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3432; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3433; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3434; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3435; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3436; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3437; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3438; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3439; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3440; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3441; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3442; CHECK-NEXT:    s_setpc_b64 s[30:31]
3443entry:
3444  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
3445  ret void
3446}
3447
3448define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3449; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16:
3450; CHECK:       ; %bb.0: ; %entry
3451; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3452; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3453; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3454; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3455; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3456; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3457; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3458; CHECK-NEXT:    s_setpc_b64 s[30:31]
3459entry:
3460  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
3461  ret void
3462}
3463
3464define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3465; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16:
3466; CHECK:       ; %bb.0: ; %entry
3467; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3468; CHECK-NEXT:    s_clause 0x1
3469; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:15
3470; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3471; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3472; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
3473; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
3474; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
3475; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
3476; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3477; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3478; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3479; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3480; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3481; CHECK-NEXT:    s_setpc_b64 s[30:31]
3482entry:
3483  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
3484  ret void
3485}
3486
3487define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3488; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16:
3489; CHECK:       ; %bb.0: ; %entry
3490; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3491; CHECK-NEXT:    s_clause 0x1
3492; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3493; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3494; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3495; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3496; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3497; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3498; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3499; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3500; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3501; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3502; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3503; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3504; CHECK-NEXT:    s_setpc_b64 s[30:31]
3505entry:
3506  tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
3507  ret void
3508}
3509
3510define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3511; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1:
3512; CHECK:       ; %bb.0: ; %entry
3513; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3514; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3515; CHECK-NEXT:    s_waitcnt vmcnt(0)
3516; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3517; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3518; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3519; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3520; CHECK-NEXT:    s_setpc_b64 s[30:31]
3521entry:
3522  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
3523  ret void
3524}
3525
3526define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3527; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1:
3528; CHECK:       ; %bb.0: ; %entry
3529; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3530; CHECK-NEXT:    s_clause 0x3
3531; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
3532; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
3533; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
3534; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3535; CHECK-NEXT:    s_waitcnt vmcnt(3)
3536; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3537; CHECK-NEXT:    s_waitcnt vmcnt(2)
3538; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3539; CHECK-NEXT:    s_waitcnt vmcnt(1)
3540; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3541; CHECK-NEXT:    s_waitcnt vmcnt(0)
3542; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3543; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3544; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3545; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3546; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3547; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3548; CHECK-NEXT:    s_setpc_b64 s[30:31]
3549entry:
3550  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
3551  ret void
3552}
3553
3554define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3555; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1:
3556; CHECK:       ; %bb.0: ; %entry
3557; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3558; CHECK-NEXT:    s_clause 0x1
3559; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3560; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3561; CHECK-NEXT:    s_waitcnt vmcnt(1)
3562; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3563; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3564; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3565; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3566; CHECK-NEXT:    s_waitcnt vmcnt(0)
3567; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3568; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3569; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3570; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3571; CHECK-NEXT:    s_setpc_b64 s[30:31]
3572entry:
3573  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
3574  ret void
3575}
3576
3577define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3578; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2:
3579; CHECK:       ; %bb.0: ; %entry
3580; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3581; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3582; CHECK-NEXT:    s_waitcnt vmcnt(0)
3583; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3584; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3585; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3586; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3587; CHECK-NEXT:    s_setpc_b64 s[30:31]
3588entry:
3589  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
3590  ret void
3591}
3592
3593define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3594; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2:
3595; CHECK:       ; %bb.0: ; %entry
3596; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3597; CHECK-NEXT:    s_clause 0x3
3598; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
3599; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
3600; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
3601; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3602; CHECK-NEXT:    s_waitcnt vmcnt(3)
3603; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3604; CHECK-NEXT:    s_waitcnt vmcnt(2)
3605; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3606; CHECK-NEXT:    s_waitcnt vmcnt(1)
3607; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3608; CHECK-NEXT:    s_waitcnt vmcnt(0)
3609; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3610; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3611; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3612; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3613; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3614; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3615; CHECK-NEXT:    s_setpc_b64 s[30:31]
3616entry:
3617  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
3618  ret void
3619}
3620
3621define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3622; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2:
3623; CHECK:       ; %bb.0: ; %entry
3624; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3625; CHECK-NEXT:    s_clause 0x1
3626; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3627; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3628; CHECK-NEXT:    s_waitcnt vmcnt(1)
3629; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3630; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3631; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3632; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3633; CHECK-NEXT:    s_waitcnt vmcnt(0)
3634; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3635; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3636; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3637; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3638; CHECK-NEXT:    s_setpc_b64 s[30:31]
3639entry:
3640  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
3641  ret void
3642}
3643
3644define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3645; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8:
3646; CHECK:       ; %bb.0: ; %entry
3647; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3648; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3649; CHECK-NEXT:    s_waitcnt vmcnt(0)
3650; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3651; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3652; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3653; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3654; CHECK-NEXT:    s_setpc_b64 s[30:31]
3655entry:
3656  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
3657  ret void
3658}
3659
3660define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3661; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8:
3662; CHECK:       ; %bb.0: ; %entry
3663; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3664; CHECK-NEXT:    s_clause 0x1
3665; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3666; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
3667; CHECK-NEXT:    s_waitcnt vmcnt(1)
3668; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3669; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3670; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3671; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3672; CHECK-NEXT:    s_waitcnt vmcnt(0)
3673; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
3674; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
3675; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
3676; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
3677; CHECK-NEXT:    s_setpc_b64 s[30:31]
3678entry:
3679  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
3680  ret void
3681}
3682
3683define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3684; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8:
3685; CHECK:       ; %bb.0: ; %entry
3686; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3687; CHECK-NEXT:    s_clause 0x1
3688; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3689; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3690; CHECK-NEXT:    s_waitcnt vmcnt(1)
3691; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3692; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3693; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3694; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3695; CHECK-NEXT:    s_waitcnt vmcnt(0)
3696; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3697; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3698; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3699; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3700; CHECK-NEXT:    s_setpc_b64 s[30:31]
3701entry:
3702  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
3703  ret void
3704}
3705
3706define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3707; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16:
3708; CHECK:       ; %bb.0: ; %entry
3709; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3710; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3711; CHECK-NEXT:    s_waitcnt vmcnt(0)
3712; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3713; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3714; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3715; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3716; CHECK-NEXT:    s_setpc_b64 s[30:31]
3717entry:
3718  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
3719  ret void
3720}
3721
3722define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3723; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16:
3724; CHECK:       ; %bb.0: ; %entry
3725; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3726; CHECK-NEXT:    s_clause 0x1
3727; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3728; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
3729; CHECK-NEXT:    s_waitcnt vmcnt(1)
3730; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3731; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3732; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3733; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3734; CHECK-NEXT:    s_waitcnt vmcnt(0)
3735; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
3736; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
3737; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
3738; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
3739; CHECK-NEXT:    s_setpc_b64 s[30:31]
3740entry:
3741  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
3742  ret void
3743}
3744
3745define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
3746; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16:
3747; CHECK:       ; %bb.0: ; %entry
3748; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3749; CHECK-NEXT:    s_clause 0x1
3750; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3751; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3752; CHECK-NEXT:    s_waitcnt vmcnt(1)
3753; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3754; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3755; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3756; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3757; CHECK-NEXT:    s_waitcnt vmcnt(0)
3758; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3759; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3760; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3761; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3762; CHECK-NEXT:    s_setpc_b64 s[30:31]
3763entry:
3764  tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
3765  ret void
3766}
3767
3768define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3769; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1:
3770; CHECK:       ; %bb.0: ; %entry
3771; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3772; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
3773; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3774; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3775; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3776; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3777; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3778; CHECK-NEXT:    s_setpc_b64 s[30:31]
3779entry:
3780  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
3781  ret void
3782}
3783
3784define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3785; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1:
3786; CHECK:       ; %bb.0: ; %entry
3787; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3788; CHECK-NEXT:    ds_read_b32 v8, v1 offset:24
3789; CHECK-NEXT:    ds_read_u16 v9, v1 offset:28
3790; CHECK-NEXT:    ds_read_u8 v10, v1 offset:30
3791; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3792; CHECK-NEXT:    ds_read_b64 v[6:7], v1 offset:16
3793; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
3794; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3795; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
3796; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3797; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
3798; CHECK-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
3799; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3800; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3801; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3802; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3803; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3804; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3805; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3806; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3807; CHECK-NEXT:    s_setpc_b64 s[30:31]
3808entry:
3809  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
3810  ret void
3811}
3812
3813define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
3814; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1:
3815; CHECK:       ; %bb.0: ; %entry
3816; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3817; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3818; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3819; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3820; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3821; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3822; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3823; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3824; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3825; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3826; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3827; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3828; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3829; CHECK-NEXT:    s_setpc_b64 s[30:31]
3830entry:
3831  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
3832  ret void
3833}
3834
3835define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3836; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2:
3837; CHECK:       ; %bb.0: ; %entry
3838; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3839; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
3840; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3841; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3842; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3843; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3844; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3845; CHECK-NEXT:    s_setpc_b64 s[30:31]
3846entry:
3847  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
3848  ret void
3849}
3850
3851define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3852; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2:
3853; CHECK:       ; %bb.0: ; %entry
3854; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3855; CHECK-NEXT:    ds_read_b32 v8, v1 offset:24
3856; CHECK-NEXT:    ds_read_u16 v9, v1 offset:28
3857; CHECK-NEXT:    ds_read_u8 v10, v1 offset:30
3858; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3859; CHECK-NEXT:    ds_read_b64 v[6:7], v1 offset:16
3860; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
3861; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3862; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
3863; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3864; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
3865; CHECK-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
3866; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3867; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3868; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3869; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3870; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3871; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3872; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3873; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3874; CHECK-NEXT:    s_setpc_b64 s[30:31]
3875entry:
3876  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
3877  ret void
3878}
3879
3880define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
3881; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2:
3882; CHECK:       ; %bb.0: ; %entry
3883; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3884; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3885; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3886; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3887; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3888; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3889; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3890; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3891; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3892; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3893; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3894; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3895; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3896; CHECK-NEXT:    s_setpc_b64 s[30:31]
3897entry:
3898  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
3899  ret void
3900}
3901
3902define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3903; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8:
3904; CHECK:       ; %bb.0: ; %entry
3905; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
3907; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3908; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3909; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3910; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3911; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3912; CHECK-NEXT:    s_setpc_b64 s[30:31]
3913entry:
3914  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
3915  ret void
3916}
3917
3918define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3919; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8:
3920; CHECK:       ; %bb.0: ; %entry
3921; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3922; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3923; CHECK-NEXT:    ds_read_b128 v[6:9], v1 offset:15
3924; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3925; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3926; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3927; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3928; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3929; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3930; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
3931; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
3932; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
3933; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
3934; CHECK-NEXT:    s_setpc_b64 s[30:31]
3935entry:
3936  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
3937  ret void
3938}
3939
3940define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
3941; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8:
3942; CHECK:       ; %bb.0: ; %entry
3943; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3944; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
3945; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
3946; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3947; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3948; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3949; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3950; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3951; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3952; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
3953; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3954; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
3955; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
3956; CHECK-NEXT:    s_setpc_b64 s[30:31]
3957entry:
3958  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
3959  ret void
3960}
3961
3962define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
3963; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16:
3964; CHECK:       ; %bb.0: ; %entry
3965; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3966; CHECK-NEXT:    ds_read_b128 v[1:4], v1
3967; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3968; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3969; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3970; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3971; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3972; CHECK-NEXT:    s_setpc_b64 s[30:31]
3973entry:
3974  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
3975  ret void
3976}
3977
3978define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
3979; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16:
3980; CHECK:       ; %bb.0: ; %entry
3981; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3982; CHECK-NEXT:    ds_read_b128 v[2:5], v1
3983; CHECK-NEXT:    ds_read_b128 v[6:9], v1 offset:15
3984; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3985; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
3986; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
3987; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
3988; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
3989; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3990; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
3991; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
3992; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
3993; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
3994; CHECK-NEXT:    s_setpc_b64 s[30:31]
3995entry:
3996  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
3997  ret void
3998}
3999
4000define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
4001; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16:
4002; CHECK:       ; %bb.0: ; %entry
4003; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4004; CHECK-NEXT:    ds_read_b128 v[2:5], v1
4005; CHECK-NEXT:    ds_read_b128 v[6:9], v1 offset:16
4006; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4007; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4008; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4009; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4010; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4011; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4012; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4013; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4014; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4015; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4016; CHECK-NEXT:    s_setpc_b64 s[30:31]
4017entry:
4018  tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
4019  ret void
4020}
4021
4022define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4023; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1:
4024; CHECK:       ; %bb.0: ; %entry
4025; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4026; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4027; CHECK-NEXT:    s_waitcnt vmcnt(0)
4028; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4029; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4030; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4031; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4032; CHECK-NEXT:    s_setpc_b64 s[30:31]
4033entry:
4034  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
4035  ret void
4036}
4037
4038define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4039; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1:
4040; CHECK:       ; %bb.0: ; %entry
4041; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4042; CHECK-NEXT:    s_clause 0x3
4043; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4044; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4045; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4046; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4047; CHECK-NEXT:    s_waitcnt vmcnt(3)
4048; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4049; CHECK-NEXT:    s_waitcnt vmcnt(2)
4050; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4051; CHECK-NEXT:    s_waitcnt vmcnt(1)
4052; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4053; CHECK-NEXT:    s_waitcnt vmcnt(0)
4054; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4055; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4056; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4057; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4058; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4059; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4060; CHECK-NEXT:    s_setpc_b64 s[30:31]
4061entry:
4062  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
4063  ret void
4064}
4065
4066define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4067; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1:
4068; CHECK:       ; %bb.0: ; %entry
4069; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4070; CHECK-NEXT:    s_clause 0x1
4071; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4072; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4073; CHECK-NEXT:    s_waitcnt vmcnt(1)
4074; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4075; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4076; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4077; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4078; CHECK-NEXT:    s_waitcnt vmcnt(0)
4079; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4080; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4081; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4082; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4083; CHECK-NEXT:    s_setpc_b64 s[30:31]
4084entry:
4085  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
4086  ret void
4087}
4088
4089define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4090; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2:
4091; CHECK:       ; %bb.0: ; %entry
4092; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4093; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4094; CHECK-NEXT:    s_waitcnt vmcnt(0)
4095; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4096; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4097; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4098; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4099; CHECK-NEXT:    s_setpc_b64 s[30:31]
4100entry:
4101  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
4102  ret void
4103}
4104
4105define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4106; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2:
4107; CHECK:       ; %bb.0: ; %entry
4108; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4109; CHECK-NEXT:    s_clause 0x3
4110; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4111; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4112; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4113; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4114; CHECK-NEXT:    s_waitcnt vmcnt(3)
4115; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4116; CHECK-NEXT:    s_waitcnt vmcnt(2)
4117; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4118; CHECK-NEXT:    s_waitcnt vmcnt(1)
4119; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4120; CHECK-NEXT:    s_waitcnt vmcnt(0)
4121; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4122; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4123; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4124; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4125; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4126; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4127; CHECK-NEXT:    s_setpc_b64 s[30:31]
4128entry:
4129  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
4130  ret void
4131}
4132
4133define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4134; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2:
4135; CHECK:       ; %bb.0: ; %entry
4136; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4137; CHECK-NEXT:    s_clause 0x1
4138; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4139; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4140; CHECK-NEXT:    s_waitcnt vmcnt(1)
4141; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4142; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4143; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4144; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4145; CHECK-NEXT:    s_waitcnt vmcnt(0)
4146; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4147; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4148; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4149; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4150; CHECK-NEXT:    s_setpc_b64 s[30:31]
4151entry:
4152  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
4153  ret void
4154}
4155
4156define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4157; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8:
4158; CHECK:       ; %bb.0: ; %entry
4159; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4160; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4161; CHECK-NEXT:    s_waitcnt vmcnt(0)
4162; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4163; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4164; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4165; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4166; CHECK-NEXT:    s_setpc_b64 s[30:31]
4167entry:
4168  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
4169  ret void
4170}
4171
4172define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4173; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8:
4174; CHECK:       ; %bb.0: ; %entry
4175; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4176; CHECK-NEXT:    s_clause 0x1
4177; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4178; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
4179; CHECK-NEXT:    s_waitcnt vmcnt(1)
4180; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4181; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4182; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4183; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4184; CHECK-NEXT:    s_waitcnt vmcnt(0)
4185; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
4186; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
4187; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
4188; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
4189; CHECK-NEXT:    s_setpc_b64 s[30:31]
4190entry:
4191  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
4192  ret void
4193}
4194
4195define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4196; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8:
4197; CHECK:       ; %bb.0: ; %entry
4198; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4199; CHECK-NEXT:    s_clause 0x1
4200; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4201; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4202; CHECK-NEXT:    s_waitcnt vmcnt(1)
4203; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4204; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4205; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4206; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4207; CHECK-NEXT:    s_waitcnt vmcnt(0)
4208; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4209; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4210; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4211; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4212; CHECK-NEXT:    s_setpc_b64 s[30:31]
4213entry:
4214  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
4215  ret void
4216}
4217
4218define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4219; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16:
4220; CHECK:       ; %bb.0: ; %entry
4221; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4222; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4223; CHECK-NEXT:    s_waitcnt vmcnt(0)
4224; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4225; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4226; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4227; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4228; CHECK-NEXT:    s_setpc_b64 s[30:31]
4229entry:
4230  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
4231  ret void
4232}
4233
4234define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4235; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16:
4236; CHECK:       ; %bb.0: ; %entry
4237; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4238; CHECK-NEXT:    s_clause 0x1
4239; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4240; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:15
4241; CHECK-NEXT:    s_waitcnt vmcnt(1)
4242; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4243; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4244; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4245; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4246; CHECK-NEXT:    s_waitcnt vmcnt(0)
4247; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
4248; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
4249; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
4250; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
4251; CHECK-NEXT:    s_setpc_b64 s[30:31]
4252entry:
4253  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
4254  ret void
4255}
4256
4257define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4258; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16:
4259; CHECK:       ; %bb.0: ; %entry
4260; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261; CHECK-NEXT:    s_clause 0x1
4262; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4263; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4264; CHECK-NEXT:    s_waitcnt vmcnt(1)
4265; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4266; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4267; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4268; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4269; CHECK-NEXT:    s_waitcnt vmcnt(0)
4270; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4271; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4272; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4273; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4274; CHECK-NEXT:    s_setpc_b64 s[30:31]
4275entry:
4276  tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
4277  ret void
4278}
4279
4280define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4281; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1:
4282; CHECK:       ; %bb.0: ; %entry
4283; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4284; CHECK-NEXT:    s_clause 0x3
4285; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4286; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4287; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen
4288; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4289; CHECK-NEXT:    s_waitcnt vmcnt(3)
4290; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4291; CHECK-NEXT:    s_waitcnt vmcnt(2)
4292; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4293; CHECK-NEXT:    s_waitcnt vmcnt(1)
4294; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen
4295; CHECK-NEXT:    s_waitcnt vmcnt(0)
4296; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4297; CHECK-NEXT:    s_setpc_b64 s[30:31]
4298entry:
4299  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
4300  ret void
4301}
4302
4303define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4304; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1:
4305; CHECK:       ; %bb.0: ; %entry
4306; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4307; CHECK-NEXT:    s_clause 0x8
4308; CHECK-NEXT:    buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4309; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4310; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4311; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4312; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4313; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4314; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4315; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4316; CHECK-NEXT:    buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4317; CHECK-NEXT:    s_waitcnt vmcnt(8)
4318; CHECK-NEXT:    buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4319; CHECK-NEXT:    s_waitcnt vmcnt(7)
4320; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4321; CHECK-NEXT:    s_waitcnt vmcnt(6)
4322; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4323; CHECK-NEXT:    s_waitcnt vmcnt(5)
4324; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4325; CHECK-NEXT:    s_waitcnt vmcnt(4)
4326; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4327; CHECK-NEXT:    s_waitcnt vmcnt(3)
4328; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4329; CHECK-NEXT:    s_waitcnt vmcnt(2)
4330; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4331; CHECK-NEXT:    s_waitcnt vmcnt(1)
4332; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4333; CHECK-NEXT:    s_waitcnt vmcnt(0)
4334; CHECK-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4335; CHECK-NEXT:    s_setpc_b64 s[30:31]
4336entry:
4337  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
4338  ret void
4339}
4340
4341define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4342; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1:
4343; CHECK:       ; %bb.0: ; %entry
4344; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4345; CHECK-NEXT:    s_clause 0x7
4346; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4347; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4348; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4349; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4350; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4351; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4352; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4353; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4354; CHECK-NEXT:    s_waitcnt vmcnt(7)
4355; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4356; CHECK-NEXT:    s_waitcnt vmcnt(6)
4357; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4358; CHECK-NEXT:    s_waitcnt vmcnt(5)
4359; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4360; CHECK-NEXT:    s_waitcnt vmcnt(4)
4361; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4362; CHECK-NEXT:    s_waitcnt vmcnt(3)
4363; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4364; CHECK-NEXT:    s_waitcnt vmcnt(2)
4365; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4366; CHECK-NEXT:    s_waitcnt vmcnt(1)
4367; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4368; CHECK-NEXT:    s_waitcnt vmcnt(0)
4369; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4370; CHECK-NEXT:    s_setpc_b64 s[30:31]
4371entry:
4372  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
4373  ret void
4374}
4375
4376define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4377; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2:
4378; CHECK:       ; %bb.0: ; %entry
4379; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4380; CHECK-NEXT:    s_clause 0x3
4381; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4382; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4383; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen
4384; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4385; CHECK-NEXT:    s_waitcnt vmcnt(3)
4386; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4387; CHECK-NEXT:    s_waitcnt vmcnt(2)
4388; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4389; CHECK-NEXT:    s_waitcnt vmcnt(1)
4390; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen
4391; CHECK-NEXT:    s_waitcnt vmcnt(0)
4392; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4393; CHECK-NEXT:    s_setpc_b64 s[30:31]
4394entry:
4395  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
4396  ret void
4397}
4398
4399define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4400; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2:
4401; CHECK:       ; %bb.0: ; %entry
4402; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4403; CHECK-NEXT:    s_clause 0x8
4404; CHECK-NEXT:    buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4405; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4406; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4407; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4408; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4409; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4410; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4411; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4412; CHECK-NEXT:    buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4413; CHECK-NEXT:    s_waitcnt vmcnt(8)
4414; CHECK-NEXT:    buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4415; CHECK-NEXT:    s_waitcnt vmcnt(7)
4416; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4417; CHECK-NEXT:    s_waitcnt vmcnt(6)
4418; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4419; CHECK-NEXT:    s_waitcnt vmcnt(5)
4420; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4421; CHECK-NEXT:    s_waitcnt vmcnt(4)
4422; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4423; CHECK-NEXT:    s_waitcnt vmcnt(3)
4424; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4425; CHECK-NEXT:    s_waitcnt vmcnt(2)
4426; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4427; CHECK-NEXT:    s_waitcnt vmcnt(1)
4428; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4429; CHECK-NEXT:    s_waitcnt vmcnt(0)
4430; CHECK-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4431; CHECK-NEXT:    s_setpc_b64 s[30:31]
4432entry:
4433  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
4434  ret void
4435}
4436
4437define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4438; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2:
4439; CHECK:       ; %bb.0: ; %entry
4440; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4441; CHECK-NEXT:    s_clause 0x7
4442; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4443; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4444; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4445; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4446; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4447; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4448; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4449; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4450; CHECK-NEXT:    s_waitcnt vmcnt(7)
4451; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4452; CHECK-NEXT:    s_waitcnt vmcnt(6)
4453; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4454; CHECK-NEXT:    s_waitcnt vmcnt(5)
4455; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4456; CHECK-NEXT:    s_waitcnt vmcnt(4)
4457; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4458; CHECK-NEXT:    s_waitcnt vmcnt(3)
4459; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4460; CHECK-NEXT:    s_waitcnt vmcnt(2)
4461; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4462; CHECK-NEXT:    s_waitcnt vmcnt(1)
4463; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4464; CHECK-NEXT:    s_waitcnt vmcnt(0)
4465; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4466; CHECK-NEXT:    s_setpc_b64 s[30:31]
4467entry:
4468  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
4469  ret void
4470}
4471
4472define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4473; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8:
4474; CHECK:       ; %bb.0: ; %entry
4475; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4476; CHECK-NEXT:    s_clause 0x3
4477; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
4478; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4479; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4480; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4481; CHECK-NEXT:    s_waitcnt vmcnt(3)
4482; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4483; CHECK-NEXT:    s_waitcnt vmcnt(2)
4484; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4485; CHECK-NEXT:    s_waitcnt vmcnt(1)
4486; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4487; CHECK-NEXT:    s_waitcnt vmcnt(0)
4488; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4489; CHECK-NEXT:    s_setpc_b64 s[30:31]
4490entry:
4491  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
4492  ret void
4493}
4494
4495define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4496; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8:
4497; CHECK:       ; %bb.0: ; %entry
4498; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4499; CHECK-NEXT:    s_clause 0x7
4500; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
4501; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
4502; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
4503; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
4504; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4505; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4506; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4507; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4508; CHECK-NEXT:    s_waitcnt vmcnt(7)
4509; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
4510; CHECK-NEXT:    s_waitcnt vmcnt(6)
4511; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
4512; CHECK-NEXT:    s_waitcnt vmcnt(5)
4513; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
4514; CHECK-NEXT:    s_waitcnt vmcnt(4)
4515; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
4516; CHECK-NEXT:    s_waitcnt vmcnt(3)
4517; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4518; CHECK-NEXT:    s_waitcnt vmcnt(2)
4519; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4520; CHECK-NEXT:    s_waitcnt vmcnt(1)
4521; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4522; CHECK-NEXT:    s_waitcnt vmcnt(0)
4523; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4524; CHECK-NEXT:    s_setpc_b64 s[30:31]
4525entry:
4526  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
4527  ret void
4528}
4529
4530define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4531; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8:
4532; CHECK:       ; %bb.0: ; %entry
4533; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4534; CHECK-NEXT:    s_clause 0x7
4535; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4536; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4537; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4538; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4539; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4540; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4541; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4542; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4543; CHECK-NEXT:    s_waitcnt vmcnt(7)
4544; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4545; CHECK-NEXT:    s_waitcnt vmcnt(6)
4546; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4547; CHECK-NEXT:    s_waitcnt vmcnt(5)
4548; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4549; CHECK-NEXT:    s_waitcnt vmcnt(4)
4550; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4551; CHECK-NEXT:    s_waitcnt vmcnt(3)
4552; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4553; CHECK-NEXT:    s_waitcnt vmcnt(2)
4554; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4555; CHECK-NEXT:    s_waitcnt vmcnt(1)
4556; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4557; CHECK-NEXT:    s_waitcnt vmcnt(0)
4558; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4559; CHECK-NEXT:    s_setpc_b64 s[30:31]
4560entry:
4561  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
4562  ret void
4563}
4564
4565define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4566; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16:
4567; CHECK:       ; %bb.0: ; %entry
4568; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4569; CHECK-NEXT:    s_clause 0x3
4570; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
4571; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4572; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4573; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4574; CHECK-NEXT:    s_waitcnt vmcnt(3)
4575; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4576; CHECK-NEXT:    s_waitcnt vmcnt(2)
4577; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4578; CHECK-NEXT:    s_waitcnt vmcnt(1)
4579; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4580; CHECK-NEXT:    s_waitcnt vmcnt(0)
4581; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4582; CHECK-NEXT:    s_setpc_b64 s[30:31]
4583entry:
4584  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
4585  ret void
4586}
4587
4588define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4589; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16:
4590; CHECK:       ; %bb.0: ; %entry
4591; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4592; CHECK-NEXT:    s_clause 0x7
4593; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
4594; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
4595; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
4596; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
4597; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4598; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4599; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4600; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4601; CHECK-NEXT:    s_waitcnt vmcnt(7)
4602; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
4603; CHECK-NEXT:    s_waitcnt vmcnt(6)
4604; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
4605; CHECK-NEXT:    s_waitcnt vmcnt(5)
4606; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
4607; CHECK-NEXT:    s_waitcnt vmcnt(4)
4608; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
4609; CHECK-NEXT:    s_waitcnt vmcnt(3)
4610; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4611; CHECK-NEXT:    s_waitcnt vmcnt(2)
4612; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4613; CHECK-NEXT:    s_waitcnt vmcnt(1)
4614; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4615; CHECK-NEXT:    s_waitcnt vmcnt(0)
4616; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4617; CHECK-NEXT:    s_setpc_b64 s[30:31]
4618entry:
4619  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
4620  ret void
4621}
4622
4623define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4624; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16:
4625; CHECK:       ; %bb.0: ; %entry
4626; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4627; CHECK-NEXT:    s_clause 0x7
4628; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4629; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4630; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4631; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4632; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4633; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4634; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4635; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4636; CHECK-NEXT:    s_waitcnt vmcnt(7)
4637; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4638; CHECK-NEXT:    s_waitcnt vmcnt(6)
4639; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4640; CHECK-NEXT:    s_waitcnt vmcnt(5)
4641; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4642; CHECK-NEXT:    s_waitcnt vmcnt(4)
4643; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4644; CHECK-NEXT:    s_waitcnt vmcnt(3)
4645; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4646; CHECK-NEXT:    s_waitcnt vmcnt(2)
4647; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4648; CHECK-NEXT:    s_waitcnt vmcnt(1)
4649; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4650; CHECK-NEXT:    s_waitcnt vmcnt(0)
4651; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4652; CHECK-NEXT:    s_setpc_b64 s[30:31]
4653entry:
4654  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
4655  ret void
4656}
4657
4658declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4659declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4660declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4661declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4662declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4663declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4664declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4665declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4666declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4667declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4668declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4669declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4670declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4671declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4672declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4673declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
4674declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
4675declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
4676declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
4677declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
4678
4679attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
4680
4681