xref: /llvm-project/llvm/test/CodeGen/AMDGPU/memmove-param-combinations.ll (revision 173c68239d1d11f4e36c8af07a28310da67568a7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
4
5; Testing codegen for memmove with vector operands for all combinations of the following parameters:
6;     destination address space: 0, 1, 3, 5
7;     source address space: 0, 1, 3, 4, 5
8;     alignment: 1, 2, 8, 16
9;     sizes: 16, 31, 32
10
11
12define void @memmove_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
13; CHECK-LABEL: memmove_p0_p0_sz16_align_1_1:
14; CHECK:       ; %bb.0: ; %entry
15; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
17; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
18; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
19; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
20; CHECK-NEXT:    s_setpc_b64 s[30:31]
21entry:
22  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
23  ret void
24}
25
26define void @memmove_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
27; CHECK-LABEL: memmove_p0_p0_sz31_align_1_1:
28; CHECK:       ; %bb.0: ; %entry
29; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; CHECK-NEXT:    s_clause 0x3
31; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
32; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
33; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
34; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
35; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
36; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
37; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
38; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
39; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
40; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
41; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(3)
42; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
43; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
44; CHECK-NEXT:    s_setpc_b64 s[30:31]
45entry:
46  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
47  ret void
48}
49
50define void @memmove_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
51; CHECK-LABEL: memmove_p0_p0_sz32_align_1_1:
52; CHECK:       ; %bb.0: ; %entry
53; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; CHECK-NEXT:    s_clause 0x1
55; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
56; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
57; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
58; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
59; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
60; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
61; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
62; CHECK-NEXT:    s_setpc_b64 s[30:31]
63entry:
64  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
65  ret void
66}
67
68define void @memmove_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
69; CHECK-LABEL: memmove_p0_p0_sz16_align_2_2:
70; CHECK:       ; %bb.0: ; %entry
71; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
73; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
74; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
75; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
76; CHECK-NEXT:    s_setpc_b64 s[30:31]
77entry:
78  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
79  ret void
80}
81
82define void @memmove_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
83; CHECK-LABEL: memmove_p0_p0_sz31_align_2_2:
84; CHECK:       ; %bb.0: ; %entry
85; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; CHECK-NEXT:    s_clause 0x3
87; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
88; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
89; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
90; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
91; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
92; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
93; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
94; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
95; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
96; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
97; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(3)
98; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
99; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
100; CHECK-NEXT:    s_setpc_b64 s[30:31]
101entry:
102  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
103  ret void
104}
105
106define void @memmove_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
107; CHECK-LABEL: memmove_p0_p0_sz32_align_2_2:
108; CHECK:       ; %bb.0: ; %entry
109; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; CHECK-NEXT:    s_clause 0x1
111; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
112; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
113; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
114; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
115; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
116; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
117; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
118; CHECK-NEXT:    s_setpc_b64 s[30:31]
119entry:
120  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
121  ret void
122}
123
124define void @memmove_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
125; CHECK-LABEL: memmove_p0_p0_sz16_align_8_8:
126; CHECK:       ; %bb.0: ; %entry
127; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
129; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
130; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
131; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
132; CHECK-NEXT:    s_setpc_b64 s[30:31]
133entry:
134  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
135  ret void
136}
137
138define void @memmove_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
139; CHECK-LABEL: memmove_p0_p0_sz31_align_8_8:
140; CHECK:       ; %bb.0: ; %entry
141; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; CHECK-NEXT:    s_clause 0x4
143; CHECK-NEXT:    flat_load_dword v8, v[2:3] offset:16
144; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3]
145; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
146; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:24
147; CHECK-NEXT:    flat_load_ubyte v11, v[2:3] offset:30
148; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(4)
149; CHECK-NEXT:    flat_store_dword v[0:1], v8 offset:16
150; CHECK-NEXT:    flat_load_dword v8, v[2:3] offset:20
151; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
152; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
153; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
154; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
155; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[8:9] offset:20
156; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
157; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
158; CHECK-NEXT:    s_setpc_b64 s[30:31]
159entry:
160  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
161  ret void
162}
163
164define void @memmove_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
165; CHECK-LABEL: memmove_p0_p0_sz32_align_8_8:
166; CHECK:       ; %bb.0: ; %entry
167; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; CHECK-NEXT:    s_clause 0x1
169; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
170; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
171; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
172; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
173; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
174; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
175; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
176; CHECK-NEXT:    s_setpc_b64 s[30:31]
177entry:
178  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
179  ret void
180}
181
182define void @memmove_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
183; CHECK-LABEL: memmove_p0_p0_sz16_align_16_16:
184; CHECK:       ; %bb.0: ; %entry
185; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
187; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
188; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
189; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
190; CHECK-NEXT:    s_setpc_b64 s[30:31]
191entry:
192  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
193  ret void
194}
195
196define void @memmove_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
197; CHECK-LABEL: memmove_p0_p0_sz31_align_16_16:
198; CHECK:       ; %bb.0: ; %entry
199; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; CHECK-NEXT:    s_clause 0x3
201; CHECK-NEXT:    flat_load_ubyte v8, v[2:3] offset:30
202; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:16
203; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
204; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3]
205; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
206; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:16
207; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:20
208; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
209; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:20
210; CHECK-NEXT:    flat_load_dword v2, v[2:3] offset:24
211; CHECK-NEXT:    flat_store_byte v[0:1], v8 offset:30
212; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
213; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
214; CHECK-NEXT:    flat_store_dword v[0:1], v2 offset:24
215; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
216; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
217; CHECK-NEXT:    s_setpc_b64 s[30:31]
218entry:
219  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
220  ret void
221}
222
223define void @memmove_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
224; CHECK-LABEL: memmove_p0_p0_sz32_align_16_16:
225; CHECK:       ; %bb.0: ; %entry
226; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227; CHECK-NEXT:    s_clause 0x1
228; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
229; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
230; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
231; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
232; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
233; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
234; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
235; CHECK-NEXT:    s_setpc_b64 s[30:31]
236entry:
237  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
238  ret void
239}
240
241define void @memmove_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
242; CHECK-LABEL: memmove_p0_p1_sz16_align_1_1:
243; CHECK:       ; %bb.0: ; %entry
244; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
246; CHECK-NEXT:    s_waitcnt vmcnt(0)
247; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
248; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
249; CHECK-NEXT:    s_setpc_b64 s[30:31]
250entry:
251  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
252  ret void
253}
254
255define void @memmove_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
256; CHECK-LABEL: memmove_p0_p1_sz31_align_1_1:
257; CHECK:       ; %bb.0: ; %entry
258; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259; CHECK-NEXT:    s_clause 0x3
260; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
261; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
262; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
263; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
264; CHECK-NEXT:    s_waitcnt vmcnt(3)
265; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
266; CHECK-NEXT:    s_waitcnt vmcnt(2)
267; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
268; CHECK-NEXT:    s_waitcnt vmcnt(1)
269; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
270; CHECK-NEXT:    s_waitcnt vmcnt(0)
271; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
272; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
273; CHECK-NEXT:    s_setpc_b64 s[30:31]
274entry:
275  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
276  ret void
277}
278
279define void @memmove_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
280; CHECK-LABEL: memmove_p0_p1_sz32_align_1_1:
281; CHECK:       ; %bb.0: ; %entry
282; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; CHECK-NEXT:    s_clause 0x1
284; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
285; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
286; CHECK-NEXT:    s_waitcnt vmcnt(1)
287; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
288; CHECK-NEXT:    s_waitcnt vmcnt(0)
289; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
290; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
291; CHECK-NEXT:    s_setpc_b64 s[30:31]
292entry:
293  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
294  ret void
295}
296
297define void @memmove_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
298; CHECK-LABEL: memmove_p0_p1_sz16_align_2_2:
299; CHECK:       ; %bb.0: ; %entry
300; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
302; CHECK-NEXT:    s_waitcnt vmcnt(0)
303; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
304; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
305; CHECK-NEXT:    s_setpc_b64 s[30:31]
306entry:
307  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
308  ret void
309}
310
311define void @memmove_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
312; CHECK-LABEL: memmove_p0_p1_sz31_align_2_2:
313; CHECK:       ; %bb.0: ; %entry
314; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; CHECK-NEXT:    s_clause 0x3
316; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
317; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
318; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
319; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
320; CHECK-NEXT:    s_waitcnt vmcnt(3)
321; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
322; CHECK-NEXT:    s_waitcnt vmcnt(2)
323; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
324; CHECK-NEXT:    s_waitcnt vmcnt(1)
325; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
326; CHECK-NEXT:    s_waitcnt vmcnt(0)
327; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
328; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
329; CHECK-NEXT:    s_setpc_b64 s[30:31]
330entry:
331  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
332  ret void
333}
334
335define void @memmove_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
336; CHECK-LABEL: memmove_p0_p1_sz32_align_2_2:
337; CHECK:       ; %bb.0: ; %entry
338; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; CHECK-NEXT:    s_clause 0x1
340; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
341; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
342; CHECK-NEXT:    s_waitcnt vmcnt(1)
343; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
344; CHECK-NEXT:    s_waitcnt vmcnt(0)
345; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
346; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
347; CHECK-NEXT:    s_setpc_b64 s[30:31]
348entry:
349  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
350  ret void
351}
352
353define void @memmove_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
354; CHECK-LABEL: memmove_p0_p1_sz16_align_8_8:
355; CHECK:       ; %bb.0: ; %entry
356; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
358; CHECK-NEXT:    s_waitcnt vmcnt(0)
359; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
360; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
361; CHECK-NEXT:    s_setpc_b64 s[30:31]
362entry:
363  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
364  ret void
365}
366
367define void @memmove_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
368; CHECK-LABEL: memmove_p0_p1_sz31_align_8_8:
369; CHECK:       ; %bb.0: ; %entry
370; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; CHECK-NEXT:    s_clause 0x4
372; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:16
373; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
374; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
375; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:24
376; CHECK-NEXT:    global_load_ubyte v11, v[2:3], off offset:30
377; CHECK-NEXT:    s_waitcnt vmcnt(4)
378; CHECK-NEXT:    flat_store_dword v[0:1], v8 offset:16
379; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:20
380; CHECK-NEXT:    s_waitcnt vmcnt(1)
381; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
382; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
383; CHECK-NEXT:    s_waitcnt vmcnt(0)
384; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[8:9] offset:20
385; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
386; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
387; CHECK-NEXT:    s_setpc_b64 s[30:31]
388entry:
389  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
390  ret void
391}
392
393define void @memmove_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
394; CHECK-LABEL: memmove_p0_p1_sz32_align_8_8:
395; CHECK:       ; %bb.0: ; %entry
396; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; CHECK-NEXT:    s_clause 0x1
398; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
399; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
400; CHECK-NEXT:    s_waitcnt vmcnt(1)
401; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
402; CHECK-NEXT:    s_waitcnt vmcnt(0)
403; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
404; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
405; CHECK-NEXT:    s_setpc_b64 s[30:31]
406entry:
407  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
408  ret void
409}
410
411define void @memmove_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
412; CHECK-LABEL: memmove_p0_p1_sz16_align_16_16:
413; CHECK:       ; %bb.0: ; %entry
414; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
416; CHECK-NEXT:    s_waitcnt vmcnt(0)
417; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
418; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
419; CHECK-NEXT:    s_setpc_b64 s[30:31]
420entry:
421  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
422  ret void
423}
424
425define void @memmove_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
426; CHECK-LABEL: memmove_p0_p1_sz31_align_16_16:
427; CHECK:       ; %bb.0: ; %entry
428; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; CHECK-NEXT:    s_clause 0x3
430; CHECK-NEXT:    global_load_ubyte v8, v[2:3], off offset:30
431; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:16
432; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
433; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
434; CHECK-NEXT:    s_waitcnt vmcnt(2)
435; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:16
436; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:20
437; CHECK-NEXT:    s_waitcnt vmcnt(0)
438; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:20
439; CHECK-NEXT:    global_load_dword v2, v[2:3], off offset:24
440; CHECK-NEXT:    flat_store_byte v[0:1], v8 offset:30
441; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
442; CHECK-NEXT:    s_waitcnt vmcnt(0)
443; CHECK-NEXT:    flat_store_dword v[0:1], v2 offset:24
444; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
445; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
446; CHECK-NEXT:    s_setpc_b64 s[30:31]
447entry:
448  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
449  ret void
450}
451
452define void @memmove_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
453; CHECK-LABEL: memmove_p0_p1_sz32_align_16_16:
454; CHECK:       ; %bb.0: ; %entry
455; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456; CHECK-NEXT:    s_clause 0x1
457; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
458; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
459; CHECK-NEXT:    s_waitcnt vmcnt(1)
460; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
461; CHECK-NEXT:    s_waitcnt vmcnt(0)
462; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
463; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
464; CHECK-NEXT:    s_setpc_b64 s[30:31]
465entry:
466  tail call void @llvm.memmove.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
467  ret void
468}
469
470define void @memmove_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
471; CHECK-LABEL: memmove_p0_p3_sz16_align_1_1:
472; CHECK:       ; %bb.0: ; %entry
473; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
475; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
476; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
477; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
478; CHECK-NEXT:    s_setpc_b64 s[30:31]
479entry:
480  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
481  ret void
482}
483
484define void @memmove_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
485; CHECK-LABEL: memmove_p0_p3_sz31_align_1_1:
486; CHECK:       ; %bb.0: ; %entry
487; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
489; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
490; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
491; CHECK-NEXT:    ds_read_b64 v[6:7], v2 offset:16
492; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
493; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
494; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
495; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
496; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
497; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
498; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
499; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
500; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
501; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
502; CHECK-NEXT:    s_setpc_b64 s[30:31]
503entry:
504  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
505  ret void
506}
507
508define void @memmove_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
509; CHECK-LABEL: memmove_p0_p3_sz32_align_1_1:
510; CHECK:       ; %bb.0: ; %entry
511; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
513; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
514; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
515; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
516; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
517; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
518; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
519; CHECK-NEXT:    s_setpc_b64 s[30:31]
520entry:
521  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
522  ret void
523}
524
525define void @memmove_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
526; CHECK-LABEL: memmove_p0_p3_sz16_align_2_2:
527; CHECK:       ; %bb.0: ; %entry
528; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
530; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
531; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
532; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
533; CHECK-NEXT:    s_setpc_b64 s[30:31]
534entry:
535  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
536  ret void
537}
538
539define void @memmove_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
540; CHECK-LABEL: memmove_p0_p3_sz31_align_2_2:
541; CHECK:       ; %bb.0: ; %entry
542; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
544; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
545; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
546; CHECK-NEXT:    ds_read_b64 v[6:7], v2 offset:16
547; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
548; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
549; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
550; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
551; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
552; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
553; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
554; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
555; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
556; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
557; CHECK-NEXT:    s_setpc_b64 s[30:31]
558entry:
559  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
560  ret void
561}
562
563define void @memmove_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
564; CHECK-LABEL: memmove_p0_p3_sz32_align_2_2:
565; CHECK:       ; %bb.0: ; %entry
566; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
568; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
569; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
570; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
571; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
572; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
573; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
574; CHECK-NEXT:    s_setpc_b64 s[30:31]
575entry:
576  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
577  ret void
578}
579
580define void @memmove_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
581; CHECK-LABEL: memmove_p0_p3_sz16_align_8_8:
582; CHECK:       ; %bb.0: ; %entry
583; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
585; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
586; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
587; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
588; CHECK-NEXT:    s_setpc_b64 s[30:31]
589entry:
590  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
591  ret void
592}
593
594define void @memmove_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
595; CHECK-LABEL: memmove_p0_p3_sz31_align_8_8:
596; CHECK:       ; %bb.0: ; %entry
597; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; CHECK-NEXT:    ds_read2_b32 v[6:7], v2 offset0:4 offset1:5
599; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
600; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
601; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
602; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
603; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
604; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
605; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
606; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
607; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
608; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
609; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
610; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
611; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
612; CHECK-NEXT:    s_setpc_b64 s[30:31]
613entry:
614  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
615  ret void
616}
617
618define void @memmove_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
619; CHECK-LABEL: memmove_p0_p3_sz32_align_8_8:
620; CHECK:       ; %bb.0: ; %entry
621; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
623; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset1:1
624; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
625; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
626; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
627; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
628; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
629; CHECK-NEXT:    s_setpc_b64 s[30:31]
630entry:
631  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
632  ret void
633}
634
635define void @memmove_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
636; CHECK-LABEL: memmove_p0_p3_sz16_align_16_16:
637; CHECK:       ; %bb.0: ; %entry
638; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639; CHECK-NEXT:    ds_read_b128 v[2:5], v2
640; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
641; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
642; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
643; CHECK-NEXT:    s_setpc_b64 s[30:31]
644entry:
645  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
646  ret void
647}
648
649define void @memmove_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
650; CHECK-LABEL: memmove_p0_p3_sz31_align_16_16:
651; CHECK:       ; %bb.0: ; %entry
652; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653; CHECK-NEXT:    ds_read2_b32 v[6:7], v2 offset0:4 offset1:5
654; CHECK-NEXT:    ds_read_b32 v8, v2 offset:24
655; CHECK-NEXT:    ds_read_u8 v9, v2 offset:30
656; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
657; CHECK-NEXT:    ds_read_b128 v[2:5], v2
658; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
659; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
660; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
661; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
662; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
663; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
664; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
665; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
666; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
667; CHECK-NEXT:    s_setpc_b64 s[30:31]
668entry:
669  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
670  ret void
671}
672
673define void @memmove_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
674; CHECK-LABEL: memmove_p0_p3_sz32_align_16_16:
675; CHECK:       ; %bb.0: ; %entry
676; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:16
678; CHECK-NEXT:    ds_read_b128 v[7:10], v2
679; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
680; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
681; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
682; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
683; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
684; CHECK-NEXT:    s_setpc_b64 s[30:31]
685entry:
686  tail call void @llvm.memmove.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
687  ret void
688}
689
690define void @memmove_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
691; CHECK-LABEL: memmove_p0_p4_sz16_align_1_1:
692; CHECK:       ; %bb.0: ; %entry
693; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
695; CHECK-NEXT:    s_waitcnt vmcnt(0)
696; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
697; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
698; CHECK-NEXT:    s_setpc_b64 s[30:31]
699entry:
700  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
701  ret void
702}
703
704define void @memmove_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
705; CHECK-LABEL: memmove_p0_p4_sz31_align_1_1:
706; CHECK:       ; %bb.0: ; %entry
707; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708; CHECK-NEXT:    s_clause 0x3
709; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
710; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
711; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
712; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
713; CHECK-NEXT:    s_waitcnt vmcnt(3)
714; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
715; CHECK-NEXT:    s_waitcnt vmcnt(2)
716; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
717; CHECK-NEXT:    s_waitcnt vmcnt(1)
718; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
719; CHECK-NEXT:    s_waitcnt vmcnt(0)
720; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
721; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
722; CHECK-NEXT:    s_setpc_b64 s[30:31]
723entry:
724  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
725  ret void
726}
727
728define void @memmove_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
729; CHECK-LABEL: memmove_p0_p4_sz32_align_1_1:
730; CHECK:       ; %bb.0: ; %entry
731; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; CHECK-NEXT:    s_clause 0x1
733; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
734; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
735; CHECK-NEXT:    s_waitcnt vmcnt(1)
736; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
737; CHECK-NEXT:    s_waitcnt vmcnt(0)
738; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
739; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
740; CHECK-NEXT:    s_setpc_b64 s[30:31]
741entry:
742  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
743  ret void
744}
745
746define void @memmove_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
747; CHECK-LABEL: memmove_p0_p4_sz16_align_2_2:
748; CHECK:       ; %bb.0: ; %entry
749; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
751; CHECK-NEXT:    s_waitcnt vmcnt(0)
752; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
753; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
754; CHECK-NEXT:    s_setpc_b64 s[30:31]
755entry:
756  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
757  ret void
758}
759
760define void @memmove_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
761; CHECK-LABEL: memmove_p0_p4_sz31_align_2_2:
762; CHECK:       ; %bb.0: ; %entry
763; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764; CHECK-NEXT:    s_clause 0x3
765; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
766; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
767; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
768; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
769; CHECK-NEXT:    s_waitcnt vmcnt(3)
770; CHECK-NEXT:    flat_store_byte v[0:1], v9 offset:30
771; CHECK-NEXT:    s_waitcnt vmcnt(2)
772; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
773; CHECK-NEXT:    s_waitcnt vmcnt(1)
774; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[6:8] offset:16
775; CHECK-NEXT:    s_waitcnt vmcnt(0)
776; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
777; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
778; CHECK-NEXT:    s_setpc_b64 s[30:31]
779entry:
780  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
781  ret void
782}
783
784define void @memmove_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
785; CHECK-LABEL: memmove_p0_p4_sz32_align_2_2:
786; CHECK:       ; %bb.0: ; %entry
787; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
788; CHECK-NEXT:    s_clause 0x1
789; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
790; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
791; CHECK-NEXT:    s_waitcnt vmcnt(1)
792; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
793; CHECK-NEXT:    s_waitcnt vmcnt(0)
794; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
795; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
796; CHECK-NEXT:    s_setpc_b64 s[30:31]
797entry:
798  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
799  ret void
800}
801
802define void @memmove_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
803; CHECK-LABEL: memmove_p0_p4_sz16_align_8_8:
804; CHECK:       ; %bb.0: ; %entry
805; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
806; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
807; CHECK-NEXT:    s_waitcnt vmcnt(0)
808; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
809; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
810; CHECK-NEXT:    s_setpc_b64 s[30:31]
811entry:
812  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
813  ret void
814}
815
816define void @memmove_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
817; CHECK-LABEL: memmove_p0_p4_sz31_align_8_8:
818; CHECK:       ; %bb.0: ; %entry
819; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820; CHECK-NEXT:    s_clause 0x4
821; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:16
822; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
823; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
824; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:24
825; CHECK-NEXT:    global_load_ubyte v11, v[2:3], off offset:30
826; CHECK-NEXT:    s_waitcnt vmcnt(4)
827; CHECK-NEXT:    flat_store_dword v[0:1], v8 offset:16
828; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:20
829; CHECK-NEXT:    s_waitcnt vmcnt(1)
830; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
831; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
832; CHECK-NEXT:    s_waitcnt vmcnt(0)
833; CHECK-NEXT:    flat_store_dwordx2 v[0:1], v[8:9] offset:20
834; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
835; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
836; CHECK-NEXT:    s_setpc_b64 s[30:31]
837entry:
838  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
839  ret void
840}
841
842define void @memmove_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
843; CHECK-LABEL: memmove_p0_p4_sz32_align_8_8:
844; CHECK:       ; %bb.0: ; %entry
845; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846; CHECK-NEXT:    s_clause 0x1
847; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
848; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
849; CHECK-NEXT:    s_waitcnt vmcnt(1)
850; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
851; CHECK-NEXT:    s_waitcnt vmcnt(0)
852; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
853; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
854; CHECK-NEXT:    s_setpc_b64 s[30:31]
855entry:
856  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
857  ret void
858}
859
860define void @memmove_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
861; CHECK-LABEL: memmove_p0_p4_sz16_align_16_16:
862; CHECK:       ; %bb.0: ; %entry
863; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
865; CHECK-NEXT:    s_waitcnt vmcnt(0)
866; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
867; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
868; CHECK-NEXT:    s_setpc_b64 s[30:31]
869entry:
870  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
871  ret void
872}
873
874define void @memmove_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
875; CHECK-LABEL: memmove_p0_p4_sz31_align_16_16:
876; CHECK:       ; %bb.0: ; %entry
877; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878; CHECK-NEXT:    s_clause 0x3
879; CHECK-NEXT:    global_load_ubyte v8, v[2:3], off offset:30
880; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:16
881; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
882; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
883; CHECK-NEXT:    s_waitcnt vmcnt(2)
884; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:16
885; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:20
886; CHECK-NEXT:    s_waitcnt vmcnt(0)
887; CHECK-NEXT:    flat_store_dword v[0:1], v9 offset:20
888; CHECK-NEXT:    global_load_dword v2, v[2:3], off offset:24
889; CHECK-NEXT:    flat_store_byte v[0:1], v8 offset:30
890; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
891; CHECK-NEXT:    s_waitcnt vmcnt(0)
892; CHECK-NEXT:    flat_store_dword v[0:1], v2 offset:24
893; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
894; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
895; CHECK-NEXT:    s_setpc_b64 s[30:31]
896entry:
897  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
898  ret void
899}
900
901define void @memmove_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
902; CHECK-LABEL: memmove_p0_p4_sz32_align_16_16:
903; CHECK:       ; %bb.0: ; %entry
904; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905; CHECK-NEXT:    s_clause 0x1
906; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
907; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
908; CHECK-NEXT:    s_waitcnt vmcnt(1)
909; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:16
910; CHECK-NEXT:    s_waitcnt vmcnt(0)
911; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
912; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
913; CHECK-NEXT:    s_setpc_b64 s[30:31]
914entry:
915  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
916  ret void
917}
918
919define void @memmove_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
920; CHECK-LABEL: memmove_p0_p5_sz16_align_1_1:
921; CHECK:       ; %bb.0: ; %entry
922; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923; CHECK-NEXT:    s_clause 0x3
924; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
925; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
926; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
927; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
928; CHECK-NEXT:    s_waitcnt vmcnt(0)
929; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
930; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
931; CHECK-NEXT:    s_setpc_b64 s[30:31]
932entry:
933  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
934  ret void
935}
936
937define void @memmove_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
938; CHECK-LABEL: memmove_p0_p5_sz31_align_1_1:
939; CHECK:       ; %bb.0: ; %entry
940; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941; CHECK-NEXT:    s_clause 0x8
942; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
943; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
944; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
945; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
946; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
947; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
948; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
949; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
950; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
951; CHECK-NEXT:    s_waitcnt vmcnt(5)
952; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
953; CHECK-NEXT:    s_waitcnt vmcnt(4)
954; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
955; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
956; CHECK-NEXT:    s_waitcnt vmcnt(0)
957; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
958; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
959; CHECK-NEXT:    s_setpc_b64 s[30:31]
960entry:
961  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
962  ret void
963}
964
965define void @memmove_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
966; CHECK-LABEL: memmove_p0_p5_sz32_align_1_1:
967; CHECK:       ; %bb.0: ; %entry
968; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969; CHECK-NEXT:    s_clause 0x7
970; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
971; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
972; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
973; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
974; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen
975; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
976; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
977; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
978; CHECK-NEXT:    s_waitcnt vmcnt(4)
979; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
980; CHECK-NEXT:    s_waitcnt vmcnt(0)
981; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
982; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
983; CHECK-NEXT:    s_setpc_b64 s[30:31]
984entry:
985  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
986  ret void
987}
988
989define void @memmove_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
990; CHECK-LABEL: memmove_p0_p5_sz16_align_2_2:
991; CHECK:       ; %bb.0: ; %entry
992; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993; CHECK-NEXT:    s_clause 0x3
994; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
995; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
996; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
997; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
998; CHECK-NEXT:    s_waitcnt vmcnt(0)
999; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1000; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1001; CHECK-NEXT:    s_setpc_b64 s[30:31]
1002entry:
1003  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
1004  ret void
1005}
1006
1007define void @memmove_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1008; CHECK-LABEL: memmove_p0_p5_sz31_align_2_2:
1009; CHECK:       ; %bb.0: ; %entry
1010; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; CHECK-NEXT:    s_clause 0x8
1012; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1013; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1014; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1015; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
1016; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
1017; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1018; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1019; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1020; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1021; CHECK-NEXT:    s_waitcnt vmcnt(5)
1022; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
1023; CHECK-NEXT:    s_waitcnt vmcnt(4)
1024; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
1025; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
1026; CHECK-NEXT:    s_waitcnt vmcnt(0)
1027; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1028; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1029; CHECK-NEXT:    s_setpc_b64 s[30:31]
1030entry:
1031  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
1032  ret void
1033}
1034
1035define void @memmove_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
1036; CHECK-LABEL: memmove_p0_p5_sz32_align_2_2:
1037; CHECK:       ; %bb.0: ; %entry
1038; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039; CHECK-NEXT:    s_clause 0x7
1040; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
1041; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
1042; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
1043; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
1044; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen
1045; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
1046; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
1047; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
1048; CHECK-NEXT:    s_waitcnt vmcnt(4)
1049; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:16
1050; CHECK-NEXT:    s_waitcnt vmcnt(0)
1051; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
1052; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1053; CHECK-NEXT:    s_setpc_b64 s[30:31]
1054entry:
1055  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
1056  ret void
1057}
1058
1059define void @memmove_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1060; CHECK-LABEL: memmove_p0_p5_sz16_align_8_8:
1061; CHECK:       ; %bb.0: ; %entry
1062; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; CHECK-NEXT:    s_clause 0x3
1064; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1065; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1066; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1067; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1068; CHECK-NEXT:    s_waitcnt vmcnt(0)
1069; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1070; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1071; CHECK-NEXT:    s_setpc_b64 s[30:31]
1072entry:
1073  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
1074  ret void
1075}
1076
1077define void @memmove_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1078; CHECK-LABEL: memmove_p0_p5_sz31_align_8_8:
1079; CHECK:       ; %bb.0: ; %entry
1080; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081; CHECK-NEXT:    s_clause 0x8
1082; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1083; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1084; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1085; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
1086; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
1087; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1088; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1089; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1090; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1091; CHECK-NEXT:    s_waitcnt vmcnt(6)
1092; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
1093; CHECK-NEXT:    s_waitcnt vmcnt(4)
1094; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
1095; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
1096; CHECK-NEXT:    s_waitcnt vmcnt(0)
1097; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1098; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1099; CHECK-NEXT:    s_setpc_b64 s[30:31]
1100entry:
1101  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
1102  ret void
1103}
1104
1105define void @memmove_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
1106; CHECK-LABEL: memmove_p0_p5_sz32_align_8_8:
1107; CHECK:       ; %bb.0: ; %entry
1108; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109; CHECK-NEXT:    s_clause 0x7
1110; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1111; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1112; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1113; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1114; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1115; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1116; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1117; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1118; CHECK-NEXT:    s_waitcnt vmcnt(4)
1119; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1120; CHECK-NEXT:    s_waitcnt vmcnt(0)
1121; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:16
1122; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1123; CHECK-NEXT:    s_setpc_b64 s[30:31]
1124entry:
1125  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
1126  ret void
1127}
1128
1129define void @memmove_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1130; CHECK-LABEL: memmove_p0_p5_sz16_align_16_16:
1131; CHECK:       ; %bb.0: ; %entry
1132; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133; CHECK-NEXT:    s_clause 0x3
1134; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1135; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1136; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1137; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1138; CHECK-NEXT:    s_waitcnt vmcnt(0)
1139; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1140; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1141; CHECK-NEXT:    s_setpc_b64 s[30:31]
1142entry:
1143  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
1144  ret void
1145}
1146
1147define void @memmove_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1148; CHECK-LABEL: memmove_p0_p5_sz31_align_16_16:
1149; CHECK:       ; %bb.0: ; %entry
1150; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151; CHECK-NEXT:    s_clause 0x8
1152; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1153; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1154; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1155; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
1156; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
1157; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1158; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1159; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1160; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1161; CHECK-NEXT:    s_waitcnt vmcnt(6)
1162; CHECK-NEXT:    flat_store_dwordx3 v[0:1], v[7:9] offset:16
1163; CHECK-NEXT:    s_waitcnt vmcnt(4)
1164; CHECK-NEXT:    flat_store_byte v[0:1], v11 offset:30
1165; CHECK-NEXT:    flat_store_short v[0:1], v10 offset:28
1166; CHECK-NEXT:    s_waitcnt vmcnt(0)
1167; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1168; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1169; CHECK-NEXT:    s_setpc_b64 s[30:31]
1170entry:
1171  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
1172  ret void
1173}
1174
1175define void @memmove_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
1176; CHECK-LABEL: memmove_p0_p5_sz32_align_16_16:
1177; CHECK:       ; %bb.0: ; %entry
1178; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179; CHECK-NEXT:    s_clause 0x7
1180; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
1181; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
1182; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
1183; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
1184; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
1185; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
1186; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
1187; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
1188; CHECK-NEXT:    s_waitcnt vmcnt(4)
1189; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[3:6]
1190; CHECK-NEXT:    s_waitcnt vmcnt(0)
1191; CHECK-NEXT:    flat_store_dwordx4 v[0:1], v[7:10] offset:16
1192; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1193; CHECK-NEXT:    s_setpc_b64 s[30:31]
1194entry:
1195  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
1196  ret void
1197}
1198
1199define void @memmove_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1200; CHECK-LABEL: memmove_p1_p0_sz16_align_1_1:
1201; CHECK:       ; %bb.0: ; %entry
1202; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1204; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1205; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1206; CHECK-NEXT:    s_setpc_b64 s[30:31]
1207entry:
1208  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
1209  ret void
1210}
1211
1212define void @memmove_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1213; CHECK-LABEL: memmove_p1_p0_sz31_align_1_1:
1214; CHECK:       ; %bb.0: ; %entry
1215; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216; CHECK-NEXT:    s_clause 0x3
1217; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
1218; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
1219; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
1220; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1221; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
1222; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1223; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
1224; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1225; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1226; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1227; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1228; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1229; CHECK-NEXT:    s_setpc_b64 s[30:31]
1230entry:
1231  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
1232  ret void
1233}
1234
1235define void @memmove_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
1236; CHECK-LABEL: memmove_p1_p0_sz32_align_1_1:
1237; CHECK:       ; %bb.0: ; %entry
1238; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239; CHECK-NEXT:    s_clause 0x1
1240; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1241; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1242; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1243; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1244; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1245; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1246; CHECK-NEXT:    s_setpc_b64 s[30:31]
1247entry:
1248  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
1249  ret void
1250}
1251
1252define void @memmove_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1253; CHECK-LABEL: memmove_p1_p0_sz16_align_2_2:
1254; CHECK:       ; %bb.0: ; %entry
1255; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1256; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1257; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1258; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1259; CHECK-NEXT:    s_setpc_b64 s[30:31]
1260entry:
1261  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
1262  ret void
1263}
1264
1265define void @memmove_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1266; CHECK-LABEL: memmove_p1_p0_sz31_align_2_2:
1267; CHECK:       ; %bb.0: ; %entry
1268; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269; CHECK-NEXT:    s_clause 0x3
1270; CHECK-NEXT:    flat_load_ubyte v9, v[2:3] offset:30
1271; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
1272; CHECK-NEXT:    flat_load_dwordx3 v[6:8], v[2:3] offset:16
1273; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1274; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
1275; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1276; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
1277; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1278; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1279; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1280; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1281; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1282; CHECK-NEXT:    s_setpc_b64 s[30:31]
1283entry:
1284  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
1285  ret void
1286}
1287
1288define void @memmove_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
1289; CHECK-LABEL: memmove_p1_p0_sz32_align_2_2:
1290; CHECK:       ; %bb.0: ; %entry
1291; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1292; CHECK-NEXT:    s_clause 0x1
1293; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1294; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1295; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1296; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1297; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1298; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1299; CHECK-NEXT:    s_setpc_b64 s[30:31]
1300entry:
1301  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
1302  ret void
1303}
1304
1305define void @memmove_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1306; CHECK-LABEL: memmove_p1_p0_sz16_align_8_8:
1307; CHECK:       ; %bb.0: ; %entry
1308; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1310; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1311; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1312; CHECK-NEXT:    s_setpc_b64 s[30:31]
1313entry:
1314  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
1315  ret void
1316}
1317
1318define void @memmove_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1319; CHECK-LABEL: memmove_p1_p0_sz31_align_8_8:
1320; CHECK:       ; %bb.0: ; %entry
1321; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322; CHECK-NEXT:    s_clause 0x4
1323; CHECK-NEXT:    flat_load_dword v8, v[2:3] offset:16
1324; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3]
1325; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
1326; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:24
1327; CHECK-NEXT:    flat_load_ubyte v11, v[2:3] offset:30
1328; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(4)
1329; CHECK-NEXT:    global_store_dword v[0:1], v8, off offset:16
1330; CHECK-NEXT:    flat_load_dword v8, v[2:3] offset:20
1331; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1332; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
1333; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1334; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1335; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:20
1336; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1337; CHECK-NEXT:    s_setpc_b64 s[30:31]
1338entry:
1339  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
1340  ret void
1341}
1342
1343define void @memmove_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
1344; CHECK-LABEL: memmove_p1_p0_sz32_align_8_8:
1345; CHECK:       ; %bb.0: ; %entry
1346; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347; CHECK-NEXT:    s_clause 0x1
1348; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1349; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1350; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1351; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1352; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1353; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1354; CHECK-NEXT:    s_setpc_b64 s[30:31]
1355entry:
1356  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
1357  ret void
1358}
1359
1360define void @memmove_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1361; CHECK-LABEL: memmove_p1_p0_sz16_align_16_16:
1362; CHECK:       ; %bb.0: ; %entry
1363; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364; CHECK-NEXT:    flat_load_dwordx4 v[2:5], v[2:3]
1365; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1366; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1367; CHECK-NEXT:    s_setpc_b64 s[30:31]
1368entry:
1369  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
1370  ret void
1371}
1372
1373define void @memmove_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1374; CHECK-LABEL: memmove_p1_p0_sz31_align_16_16:
1375; CHECK:       ; %bb.0: ; %entry
1376; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377; CHECK-NEXT:    s_clause 0x3
1378; CHECK-NEXT:    flat_load_ubyte v8, v[2:3] offset:30
1379; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:16
1380; CHECK-NEXT:    flat_load_ushort v10, v[2:3] offset:28
1381; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3]
1382; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
1383; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:16
1384; CHECK-NEXT:    flat_load_dword v9, v[2:3] offset:20
1385; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1386; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:20
1387; CHECK-NEXT:    flat_load_dword v2, v[2:3] offset:24
1388; CHECK-NEXT:    global_store_byte v[0:1], v8, off offset:30
1389; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1390; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1391; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:24
1392; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1393; CHECK-NEXT:    s_setpc_b64 s[30:31]
1394entry:
1395  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
1396  ret void
1397}
1398
1399define void @memmove_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
1400; CHECK-LABEL: memmove_p1_p0_sz32_align_16_16:
1401; CHECK:       ; %bb.0: ; %entry
1402; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; CHECK-NEXT:    s_clause 0x1
1404; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:16
1405; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[2:3]
1406; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
1407; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1408; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1409; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1410; CHECK-NEXT:    s_setpc_b64 s[30:31]
1411entry:
1412  tail call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
1413  ret void
1414}
1415
1416define void @memmove_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1417; CHECK-LABEL: memmove_p1_p1_sz16_align_1_1:
1418; CHECK:       ; %bb.0: ; %entry
1419; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1421; CHECK-NEXT:    s_waitcnt vmcnt(0)
1422; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1423; CHECK-NEXT:    s_setpc_b64 s[30:31]
1424entry:
1425  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
1426  ret void
1427}
1428
1429define void @memmove_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1430; CHECK-LABEL: memmove_p1_p1_sz31_align_1_1:
1431; CHECK:       ; %bb.0: ; %entry
1432; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1433; CHECK-NEXT:    s_clause 0x3
1434; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
1435; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1436; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
1437; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1438; CHECK-NEXT:    s_waitcnt vmcnt(3)
1439; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1440; CHECK-NEXT:    s_waitcnt vmcnt(2)
1441; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1442; CHECK-NEXT:    s_waitcnt vmcnt(1)
1443; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1444; CHECK-NEXT:    s_waitcnt vmcnt(0)
1445; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1446; CHECK-NEXT:    s_setpc_b64 s[30:31]
1447entry:
1448  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
1449  ret void
1450}
1451
1452define void @memmove_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
1453; CHECK-LABEL: memmove_p1_p1_sz32_align_1_1:
1454; CHECK:       ; %bb.0: ; %entry
1455; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456; CHECK-NEXT:    s_clause 0x1
1457; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1458; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1459; CHECK-NEXT:    s_waitcnt vmcnt(1)
1460; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1461; CHECK-NEXT:    s_waitcnt vmcnt(0)
1462; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1463; CHECK-NEXT:    s_setpc_b64 s[30:31]
1464entry:
1465  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
1466  ret void
1467}
1468
1469define void @memmove_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1470; CHECK-LABEL: memmove_p1_p1_sz16_align_2_2:
1471; CHECK:       ; %bb.0: ; %entry
1472; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1474; CHECK-NEXT:    s_waitcnt vmcnt(0)
1475; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1476; CHECK-NEXT:    s_setpc_b64 s[30:31]
1477entry:
1478  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
1479  ret void
1480}
1481
1482define void @memmove_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1483; CHECK-LABEL: memmove_p1_p1_sz31_align_2_2:
1484; CHECK:       ; %bb.0: ; %entry
1485; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1486; CHECK-NEXT:    s_clause 0x3
1487; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
1488; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1489; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
1490; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1491; CHECK-NEXT:    s_waitcnt vmcnt(3)
1492; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1493; CHECK-NEXT:    s_waitcnt vmcnt(2)
1494; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1495; CHECK-NEXT:    s_waitcnt vmcnt(1)
1496; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1497; CHECK-NEXT:    s_waitcnt vmcnt(0)
1498; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1499; CHECK-NEXT:    s_setpc_b64 s[30:31]
1500entry:
1501  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
1502  ret void
1503}
1504
1505define void @memmove_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
1506; CHECK-LABEL: memmove_p1_p1_sz32_align_2_2:
1507; CHECK:       ; %bb.0: ; %entry
1508; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; CHECK-NEXT:    s_clause 0x1
1510; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1511; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1512; CHECK-NEXT:    s_waitcnt vmcnt(1)
1513; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1514; CHECK-NEXT:    s_waitcnt vmcnt(0)
1515; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1516; CHECK-NEXT:    s_setpc_b64 s[30:31]
1517entry:
1518  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
1519  ret void
1520}
1521
1522define void @memmove_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1523; CHECK-LABEL: memmove_p1_p1_sz16_align_8_8:
1524; CHECK:       ; %bb.0: ; %entry
1525; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1526; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1527; CHECK-NEXT:    s_waitcnt vmcnt(0)
1528; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1529; CHECK-NEXT:    s_setpc_b64 s[30:31]
1530entry:
1531  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
1532  ret void
1533}
1534
1535define void @memmove_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1536; CHECK-LABEL: memmove_p1_p1_sz31_align_8_8:
1537; CHECK:       ; %bb.0: ; %entry
1538; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539; CHECK-NEXT:    s_clause 0x4
1540; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:16
1541; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1542; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1543; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:24
1544; CHECK-NEXT:    global_load_ubyte v11, v[2:3], off offset:30
1545; CHECK-NEXT:    s_waitcnt vmcnt(4)
1546; CHECK-NEXT:    global_store_dword v[0:1], v8, off offset:16
1547; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:20
1548; CHECK-NEXT:    s_waitcnt vmcnt(1)
1549; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
1550; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1551; CHECK-NEXT:    s_waitcnt vmcnt(0)
1552; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:20
1553; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1554; CHECK-NEXT:    s_setpc_b64 s[30:31]
1555entry:
1556  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
1557  ret void
1558}
1559
1560define void @memmove_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
1561; CHECK-LABEL: memmove_p1_p1_sz32_align_8_8:
1562; CHECK:       ; %bb.0: ; %entry
1563; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1564; CHECK-NEXT:    s_clause 0x1
1565; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1566; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1567; CHECK-NEXT:    s_waitcnt vmcnt(1)
1568; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1569; CHECK-NEXT:    s_waitcnt vmcnt(0)
1570; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1571; CHECK-NEXT:    s_setpc_b64 s[30:31]
1572entry:
1573  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
1574  ret void
1575}
1576
1577define void @memmove_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1578; CHECK-LABEL: memmove_p1_p1_sz16_align_16_16:
1579; CHECK:       ; %bb.0: ; %entry
1580; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1582; CHECK-NEXT:    s_waitcnt vmcnt(0)
1583; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1584; CHECK-NEXT:    s_setpc_b64 s[30:31]
1585entry:
1586  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
1587  ret void
1588}
1589
1590define void @memmove_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1591; CHECK-LABEL: memmove_p1_p1_sz31_align_16_16:
1592; CHECK:       ; %bb.0: ; %entry
1593; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594; CHECK-NEXT:    s_clause 0x3
1595; CHECK-NEXT:    global_load_ubyte v8, v[2:3], off offset:30
1596; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:16
1597; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1598; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1599; CHECK-NEXT:    s_waitcnt vmcnt(2)
1600; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:16
1601; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:20
1602; CHECK-NEXT:    s_waitcnt vmcnt(0)
1603; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:20
1604; CHECK-NEXT:    global_load_dword v2, v[2:3], off offset:24
1605; CHECK-NEXT:    global_store_byte v[0:1], v8, off offset:30
1606; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1607; CHECK-NEXT:    s_waitcnt vmcnt(0)
1608; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:24
1609; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1610; CHECK-NEXT:    s_setpc_b64 s[30:31]
1611entry:
1612  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
1613  ret void
1614}
1615
1616define void @memmove_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
1617; CHECK-LABEL: memmove_p1_p1_sz32_align_16_16:
1618; CHECK:       ; %bb.0: ; %entry
1619; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620; CHECK-NEXT:    s_clause 0x1
1621; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1622; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1623; CHECK-NEXT:    s_waitcnt vmcnt(1)
1624; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1625; CHECK-NEXT:    s_waitcnt vmcnt(0)
1626; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1627; CHECK-NEXT:    s_setpc_b64 s[30:31]
1628entry:
1629  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
1630  ret void
1631}
1632
1633define void @memmove_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1634; CHECK-LABEL: memmove_p1_p3_sz16_align_1_1:
1635; CHECK:       ; %bb.0: ; %entry
1636; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1638; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1639; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1640; CHECK-NEXT:    s_setpc_b64 s[30:31]
1641entry:
1642  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
1643  ret void
1644}
1645
1646define void @memmove_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1647; CHECK-LABEL: memmove_p1_p3_sz31_align_1_1:
1648; CHECK:       ; %bb.0: ; %entry
1649; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650; CHECK-NEXT:    ds_read_b64 v[7:8], v2
1651; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:8
1652; CHECK-NEXT:    ds_read_b32 v9, v2 offset:24
1653; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
1654; CHECK-NEXT:    ds_read_u8 v2, v2 offset:30
1655; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
1656; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off
1657; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
1658; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:8
1659; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
1660; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:24
1661; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1662; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1663; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1664; CHECK-NEXT:    global_store_byte v[0:1], v2, off offset:30
1665; CHECK-NEXT:    s_setpc_b64 s[30:31]
1666entry:
1667  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
1668  ret void
1669}
1670
1671define void @memmove_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
1672; CHECK-LABEL: memmove_p1_p3_sz32_align_1_1:
1673; CHECK:       ; %bb.0: ; %entry
1674; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1675; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1676; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1677; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1678; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1679; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1680; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1681; CHECK-NEXT:    s_setpc_b64 s[30:31]
1682entry:
1683  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
1684  ret void
1685}
1686
1687define void @memmove_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1688; CHECK-LABEL: memmove_p1_p3_sz16_align_2_2:
1689; CHECK:       ; %bb.0: ; %entry
1690; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1691; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1692; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1693; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1694; CHECK-NEXT:    s_setpc_b64 s[30:31]
1695entry:
1696  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
1697  ret void
1698}
1699
1700define void @memmove_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1701; CHECK-LABEL: memmove_p1_p3_sz31_align_2_2:
1702; CHECK:       ; %bb.0: ; %entry
1703; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704; CHECK-NEXT:    ds_read_b64 v[7:8], v2
1705; CHECK-NEXT:    ds_read_b128 v[3:6], v2 offset:8
1706; CHECK-NEXT:    ds_read_b32 v9, v2 offset:24
1707; CHECK-NEXT:    ds_read_u16 v10, v2 offset:28
1708; CHECK-NEXT:    ds_read_u8 v2, v2 offset:30
1709; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
1710; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[7:8], off
1711; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
1712; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off offset:8
1713; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
1714; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:24
1715; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1716; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1717; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1718; CHECK-NEXT:    global_store_byte v[0:1], v2, off offset:30
1719; CHECK-NEXT:    s_setpc_b64 s[30:31]
1720entry:
1721  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
1722  ret void
1723}
1724
1725define void @memmove_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
1726; CHECK-LABEL: memmove_p1_p3_sz32_align_2_2:
1727; CHECK:       ; %bb.0: ; %entry
1728; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1730; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1731; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1732; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1733; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1734; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1735; CHECK-NEXT:    s_setpc_b64 s[30:31]
1736entry:
1737  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
1738  ret void
1739}
1740
1741define void @memmove_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1742; CHECK-LABEL: memmove_p1_p3_sz16_align_8_8:
1743; CHECK:       ; %bb.0: ; %entry
1744; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745; CHECK-NEXT:    ds_read2_b64 v[2:5], v2 offset1:1
1746; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1747; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1748; CHECK-NEXT:    s_setpc_b64 s[30:31]
1749entry:
1750  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
1751  ret void
1752}
1753
1754define void @memmove_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1755; CHECK-LABEL: memmove_p1_p3_sz31_align_8_8:
1756; CHECK:       ; %bb.0: ; %entry
1757; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1758; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset1:1
1759; CHECK-NEXT:    ds_read_b32 v7, v2 offset:16
1760; CHECK-NEXT:    ds_read_u8 v10, v2 offset:30
1761; CHECK-NEXT:    ds_read2_b32 v[8:9], v2 offset0:5 offset1:6
1762; CHECK-NEXT:    ds_read_u16 v2, v2 offset:28
1763; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
1764; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1765; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1766; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
1767; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1768; CHECK-NEXT:    global_store_short v[0:1], v2, off offset:28
1769; CHECK-NEXT:    global_store_byte v[0:1], v10, off offset:30
1770; CHECK-NEXT:    s_setpc_b64 s[30:31]
1771entry:
1772  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
1773  ret void
1774}
1775
1776define void @memmove_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
1777; CHECK-LABEL: memmove_p1_p3_sz32_align_8_8:
1778; CHECK:       ; %bb.0: ; %entry
1779; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780; CHECK-NEXT:    ds_read2_b64 v[3:6], v2 offset1:1
1781; CHECK-NEXT:    ds_read2_b64 v[7:10], v2 offset0:2 offset1:3
1782; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1783; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1784; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1785; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1786; CHECK-NEXT:    s_setpc_b64 s[30:31]
1787entry:
1788  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
1789  ret void
1790}
1791
1792define void @memmove_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1793; CHECK-LABEL: memmove_p1_p3_sz16_align_16_16:
1794; CHECK:       ; %bb.0: ; %entry
1795; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1796; CHECK-NEXT:    ds_read_b128 v[2:5], v2
1797; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1798; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1799; CHECK-NEXT:    s_setpc_b64 s[30:31]
1800entry:
1801  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
1802  ret void
1803}
1804
1805define void @memmove_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1806; CHECK-LABEL: memmove_p1_p3_sz31_align_16_16:
1807; CHECK:       ; %bb.0: ; %entry
1808; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1810; CHECK-NEXT:    ds_read_b32 v7, v2 offset:16
1811; CHECK-NEXT:    ds_read_u8 v10, v2 offset:30
1812; CHECK-NEXT:    ds_read2_b32 v[8:9], v2 offset0:5 offset1:6
1813; CHECK-NEXT:    ds_read_u16 v2, v2 offset:28
1814; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
1815; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1816; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1817; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
1818; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1819; CHECK-NEXT:    global_store_short v[0:1], v2, off offset:28
1820; CHECK-NEXT:    global_store_byte v[0:1], v10, off offset:30
1821; CHECK-NEXT:    s_setpc_b64 s[30:31]
1822entry:
1823  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
1824  ret void
1825}
1826
1827define void @memmove_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
1828; CHECK-LABEL: memmove_p1_p3_sz32_align_16_16:
1829; CHECK:       ; %bb.0: ; %entry
1830; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1831; CHECK-NEXT:    ds_read_b128 v[3:6], v2
1832; CHECK-NEXT:    ds_read_b128 v[7:10], v2 offset:16
1833; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
1834; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1835; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
1836; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
1837; CHECK-NEXT:    s_setpc_b64 s[30:31]
1838entry:
1839  tail call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
1840  ret void
1841}
1842
1843define void @memmove_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1844; CHECK-LABEL: memmove_p1_p4_sz16_align_1_1:
1845; CHECK:       ; %bb.0: ; %entry
1846; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1847; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1848; CHECK-NEXT:    s_waitcnt vmcnt(0)
1849; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1850; CHECK-NEXT:    s_setpc_b64 s[30:31]
1851entry:
1852  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
1853  ret void
1854}
1855
1856define void @memmove_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1857; CHECK-LABEL: memmove_p1_p4_sz31_align_1_1:
1858; CHECK:       ; %bb.0: ; %entry
1859; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860; CHECK-NEXT:    s_clause 0x3
1861; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
1862; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1863; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
1864; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1865; CHECK-NEXT:    s_waitcnt vmcnt(3)
1866; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1867; CHECK-NEXT:    s_waitcnt vmcnt(2)
1868; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1869; CHECK-NEXT:    s_waitcnt vmcnt(1)
1870; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1871; CHECK-NEXT:    s_waitcnt vmcnt(0)
1872; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1873; CHECK-NEXT:    s_setpc_b64 s[30:31]
1874entry:
1875  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
1876  ret void
1877}
1878
1879define void @memmove_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
1880; CHECK-LABEL: memmove_p1_p4_sz32_align_1_1:
1881; CHECK:       ; %bb.0: ; %entry
1882; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883; CHECK-NEXT:    s_clause 0x1
1884; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1885; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1886; CHECK-NEXT:    s_waitcnt vmcnt(1)
1887; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1888; CHECK-NEXT:    s_waitcnt vmcnt(0)
1889; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1890; CHECK-NEXT:    s_setpc_b64 s[30:31]
1891entry:
1892  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
1893  ret void
1894}
1895
1896define void @memmove_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1897; CHECK-LABEL: memmove_p1_p4_sz16_align_2_2:
1898; CHECK:       ; %bb.0: ; %entry
1899; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1901; CHECK-NEXT:    s_waitcnt vmcnt(0)
1902; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1903; CHECK-NEXT:    s_setpc_b64 s[30:31]
1904entry:
1905  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
1906  ret void
1907}
1908
1909define void @memmove_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1910; CHECK-LABEL: memmove_p1_p4_sz31_align_2_2:
1911; CHECK:       ; %bb.0: ; %entry
1912; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1913; CHECK-NEXT:    s_clause 0x3
1914; CHECK-NEXT:    global_load_ubyte v9, v[2:3], off offset:30
1915; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1916; CHECK-NEXT:    global_load_dwordx3 v[6:8], v[2:3], off offset:16
1917; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1918; CHECK-NEXT:    s_waitcnt vmcnt(3)
1919; CHECK-NEXT:    global_store_byte v[0:1], v9, off offset:30
1920; CHECK-NEXT:    s_waitcnt vmcnt(2)
1921; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1922; CHECK-NEXT:    s_waitcnt vmcnt(1)
1923; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off offset:16
1924; CHECK-NEXT:    s_waitcnt vmcnt(0)
1925; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1926; CHECK-NEXT:    s_setpc_b64 s[30:31]
1927entry:
1928  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
1929  ret void
1930}
1931
1932define void @memmove_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
1933; CHECK-LABEL: memmove_p1_p4_sz32_align_2_2:
1934; CHECK:       ; %bb.0: ; %entry
1935; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1936; CHECK-NEXT:    s_clause 0x1
1937; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1938; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1939; CHECK-NEXT:    s_waitcnt vmcnt(1)
1940; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1941; CHECK-NEXT:    s_waitcnt vmcnt(0)
1942; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1943; CHECK-NEXT:    s_setpc_b64 s[30:31]
1944entry:
1945  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
1946  ret void
1947}
1948
1949define void @memmove_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1950; CHECK-LABEL: memmove_p1_p4_sz16_align_8_8:
1951; CHECK:       ; %bb.0: ; %entry
1952; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
1954; CHECK-NEXT:    s_waitcnt vmcnt(0)
1955; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1956; CHECK-NEXT:    s_setpc_b64 s[30:31]
1957entry:
1958  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
1959  ret void
1960}
1961
1962define void @memmove_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1963; CHECK-LABEL: memmove_p1_p4_sz31_align_8_8:
1964; CHECK:       ; %bb.0: ; %entry
1965; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966; CHECK-NEXT:    s_clause 0x4
1967; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:16
1968; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
1969; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
1970; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:24
1971; CHECK-NEXT:    global_load_ubyte v11, v[2:3], off offset:30
1972; CHECK-NEXT:    s_waitcnt vmcnt(4)
1973; CHECK-NEXT:    global_store_dword v[0:1], v8, off offset:16
1974; CHECK-NEXT:    global_load_dword v8, v[2:3], off offset:20
1975; CHECK-NEXT:    s_waitcnt vmcnt(1)
1976; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
1977; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
1978; CHECK-NEXT:    s_waitcnt vmcnt(0)
1979; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off offset:20
1980; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1981; CHECK-NEXT:    s_setpc_b64 s[30:31]
1982entry:
1983  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
1984  ret void
1985}
1986
1987define void @memmove_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
1988; CHECK-LABEL: memmove_p1_p4_sz32_align_8_8:
1989; CHECK:       ; %bb.0: ; %entry
1990; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1991; CHECK-NEXT:    s_clause 0x1
1992; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
1993; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
1994; CHECK-NEXT:    s_waitcnt vmcnt(1)
1995; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
1996; CHECK-NEXT:    s_waitcnt vmcnt(0)
1997; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1998; CHECK-NEXT:    s_setpc_b64 s[30:31]
1999entry:
2000  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
2001  ret void
2002}
2003
2004define void @memmove_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2005; CHECK-LABEL: memmove_p1_p4_sz16_align_16_16:
2006; CHECK:       ; %bb.0: ; %entry
2007; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008; CHECK-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off
2009; CHECK-NEXT:    s_waitcnt vmcnt(0)
2010; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
2011; CHECK-NEXT:    s_setpc_b64 s[30:31]
2012entry:
2013  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
2014  ret void
2015}
2016
2017define void @memmove_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2018; CHECK-LABEL: memmove_p1_p4_sz31_align_16_16:
2019; CHECK:       ; %bb.0: ; %entry
2020; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2021; CHECK-NEXT:    s_clause 0x3
2022; CHECK-NEXT:    global_load_ubyte v8, v[2:3], off offset:30
2023; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:16
2024; CHECK-NEXT:    global_load_ushort v10, v[2:3], off offset:28
2025; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off
2026; CHECK-NEXT:    s_waitcnt vmcnt(2)
2027; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:16
2028; CHECK-NEXT:    global_load_dword v9, v[2:3], off offset:20
2029; CHECK-NEXT:    s_waitcnt vmcnt(0)
2030; CHECK-NEXT:    global_store_dword v[0:1], v9, off offset:20
2031; CHECK-NEXT:    global_load_dword v2, v[2:3], off offset:24
2032; CHECK-NEXT:    global_store_byte v[0:1], v8, off offset:30
2033; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
2034; CHECK-NEXT:    s_waitcnt vmcnt(0)
2035; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:24
2036; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2037; CHECK-NEXT:    s_setpc_b64 s[30:31]
2038entry:
2039  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
2040  ret void
2041}
2042
2043define void @memmove_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
2044; CHECK-LABEL: memmove_p1_p4_sz32_align_16_16:
2045; CHECK:       ; %bb.0: ; %entry
2046; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2047; CHECK-NEXT:    s_clause 0x1
2048; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:16
2049; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off
2050; CHECK-NEXT:    s_waitcnt vmcnt(1)
2051; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:16
2052; CHECK-NEXT:    s_waitcnt vmcnt(0)
2053; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2054; CHECK-NEXT:    s_setpc_b64 s[30:31]
2055entry:
2056  tail call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
2057  ret void
2058}
2059
2060define void @memmove_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
2061; CHECK-LABEL: memmove_p1_p5_sz16_align_1_1:
2062; CHECK:       ; %bb.0: ; %entry
2063; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2064; CHECK-NEXT:    s_clause 0x3
2065; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2066; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2067; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2068; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2069; CHECK-NEXT:    s_waitcnt vmcnt(0)
2070; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2071; CHECK-NEXT:    s_setpc_b64 s[30:31]
2072entry:
2073  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
2074  ret void
2075}
2076
2077define void @memmove_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
2078; CHECK-LABEL: memmove_p1_p5_sz31_align_1_1:
2079; CHECK:       ; %bb.0: ; %entry
2080; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2081; CHECK-NEXT:    s_clause 0x8
2082; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
2083; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
2084; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2085; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2086; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2087; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2088; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2089; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2090; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2091; CHECK-NEXT:    s_waitcnt vmcnt(8)
2092; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
2093; CHECK-NEXT:    s_waitcnt vmcnt(7)
2094; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
2095; CHECK-NEXT:    s_waitcnt vmcnt(3)
2096; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2097; CHECK-NEXT:    s_waitcnt vmcnt(0)
2098; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
2099; CHECK-NEXT:    s_setpc_b64 s[30:31]
2100entry:
2101  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
2102  ret void
2103}
2104
2105define void @memmove_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
2106; CHECK-LABEL: memmove_p1_p5_sz32_align_1_1:
2107; CHECK:       ; %bb.0: ; %entry
2108; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109; CHECK-NEXT:    s_clause 0x7
2110; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2111; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2112; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2113; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2114; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2115; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2116; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2117; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2118; CHECK-NEXT:    s_waitcnt vmcnt(4)
2119; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2120; CHECK-NEXT:    s_waitcnt vmcnt(0)
2121; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2122; CHECK-NEXT:    s_setpc_b64 s[30:31]
2123entry:
2124  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
2125  ret void
2126}
2127
2128define void @memmove_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
2129; CHECK-LABEL: memmove_p1_p5_sz16_align_2_2:
2130; CHECK:       ; %bb.0: ; %entry
2131; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2132; CHECK-NEXT:    s_clause 0x3
2133; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2134; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2135; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2136; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2137; CHECK-NEXT:    s_waitcnt vmcnt(0)
2138; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2139; CHECK-NEXT:    s_setpc_b64 s[30:31]
2140entry:
2141  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
2142  ret void
2143}
2144
2145define void @memmove_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
2146; CHECK-LABEL: memmove_p1_p5_sz31_align_2_2:
2147; CHECK:       ; %bb.0: ; %entry
2148; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149; CHECK-NEXT:    s_clause 0x8
2150; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
2151; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
2152; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2153; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2154; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2155; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2156; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2157; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2158; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2159; CHECK-NEXT:    s_waitcnt vmcnt(8)
2160; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
2161; CHECK-NEXT:    s_waitcnt vmcnt(7)
2162; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
2163; CHECK-NEXT:    s_waitcnt vmcnt(3)
2164; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2165; CHECK-NEXT:    s_waitcnt vmcnt(0)
2166; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
2167; CHECK-NEXT:    s_setpc_b64 s[30:31]
2168entry:
2169  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
2170  ret void
2171}
2172
2173define void @memmove_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
2174; CHECK-LABEL: memmove_p1_p5_sz32_align_2_2:
2175; CHECK:       ; %bb.0: ; %entry
2176; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2177; CHECK-NEXT:    s_clause 0x7
2178; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2179; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2180; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2181; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2182; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2183; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2184; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2185; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2186; CHECK-NEXT:    s_waitcnt vmcnt(4)
2187; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2188; CHECK-NEXT:    s_waitcnt vmcnt(0)
2189; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2190; CHECK-NEXT:    s_setpc_b64 s[30:31]
2191entry:
2192  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
2193  ret void
2194}
2195
2196define void @memmove_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2197; CHECK-LABEL: memmove_p1_p5_sz16_align_8_8:
2198; CHECK:       ; %bb.0: ; %entry
2199; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2200; CHECK-NEXT:    s_clause 0x3
2201; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2202; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2203; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2204; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2205; CHECK-NEXT:    s_waitcnt vmcnt(0)
2206; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2207; CHECK-NEXT:    s_setpc_b64 s[30:31]
2208entry:
2209  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
2210  ret void
2211}
2212
2213define void @memmove_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2214; CHECK-LABEL: memmove_p1_p5_sz31_align_8_8:
2215; CHECK:       ; %bb.0: ; %entry
2216; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217; CHECK-NEXT:    s_clause 0x8
2218; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
2219; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
2220; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2221; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2222; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2223; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2224; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2225; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2226; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2227; CHECK-NEXT:    s_waitcnt vmcnt(8)
2228; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
2229; CHECK-NEXT:    s_waitcnt vmcnt(7)
2230; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
2231; CHECK-NEXT:    s_waitcnt vmcnt(3)
2232; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2233; CHECK-NEXT:    s_waitcnt vmcnt(0)
2234; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
2235; CHECK-NEXT:    s_setpc_b64 s[30:31]
2236entry:
2237  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
2238  ret void
2239}
2240
2241define void @memmove_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
2242; CHECK-LABEL: memmove_p1_p5_sz32_align_8_8:
2243; CHECK:       ; %bb.0: ; %entry
2244; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2245; CHECK-NEXT:    s_clause 0x7
2246; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2247; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2248; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2249; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2250; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2251; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2252; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2253; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2254; CHECK-NEXT:    s_waitcnt vmcnt(4)
2255; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2256; CHECK-NEXT:    s_waitcnt vmcnt(0)
2257; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2258; CHECK-NEXT:    s_setpc_b64 s[30:31]
2259entry:
2260  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
2261  ret void
2262}
2263
2264define void @memmove_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2265; CHECK-LABEL: memmove_p1_p5_sz16_align_16_16:
2266; CHECK:       ; %bb.0: ; %entry
2267; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2268; CHECK-NEXT:    s_clause 0x3
2269; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2270; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2271; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2272; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2273; CHECK-NEXT:    s_waitcnt vmcnt(0)
2274; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2275; CHECK-NEXT:    s_setpc_b64 s[30:31]
2276entry:
2277  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
2278  ret void
2279}
2280
2281define void @memmove_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2282; CHECK-LABEL: memmove_p1_p5_sz31_align_16_16:
2283; CHECK:       ; %bb.0: ; %entry
2284; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285; CHECK-NEXT:    s_clause 0x8
2286; CHECK-NEXT:    buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
2287; CHECK-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
2288; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2289; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2290; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2291; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2292; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2293; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2294; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2295; CHECK-NEXT:    s_waitcnt vmcnt(8)
2296; CHECK-NEXT:    global_store_short v[0:1], v10, off offset:28
2297; CHECK-NEXT:    s_waitcnt vmcnt(7)
2298; CHECK-NEXT:    global_store_byte v[0:1], v11, off offset:30
2299; CHECK-NEXT:    s_waitcnt vmcnt(3)
2300; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2301; CHECK-NEXT:    s_waitcnt vmcnt(0)
2302; CHECK-NEXT:    global_store_dwordx3 v[0:1], v[7:9], off offset:16
2303; CHECK-NEXT:    s_setpc_b64 s[30:31]
2304entry:
2305  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
2306  ret void
2307}
2308
2309define void @memmove_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
2310; CHECK-LABEL: memmove_p1_p5_sz32_align_16_16:
2311; CHECK:       ; %bb.0: ; %entry
2312; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313; CHECK-NEXT:    s_clause 0x7
2314; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
2315; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
2316; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
2317; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
2318; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
2319; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
2320; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
2321; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
2322; CHECK-NEXT:    s_waitcnt vmcnt(4)
2323; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
2324; CHECK-NEXT:    s_waitcnt vmcnt(0)
2325; CHECK-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off offset:16
2326; CHECK-NEXT:    s_setpc_b64 s[30:31]
2327entry:
2328  tail call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
2329  ret void
2330}
2331
2332define void @memmove_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2333; CHECK-LABEL: memmove_p3_p0_sz16_align_1_1:
2334; CHECK:       ; %bb.0: ; %entry
2335; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2336; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2337; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2338; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2339; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2340; CHECK-NEXT:    s_setpc_b64 s[30:31]
2341entry:
2342  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
2343  ret void
2344}
2345
2346define void @memmove_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2347; CHECK-LABEL: memmove_p3_p0_sz31_align_1_1:
2348; CHECK:       ; %bb.0: ; %entry
2349; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350; CHECK-NEXT:    s_clause 0x3
2351; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
2352; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
2353; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
2354; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2355; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
2356; CHECK-NEXT:    ds_write_b8 v0, v8 offset:30
2357; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
2358; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
2359; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
2360; CHECK-NEXT:    ds_write_b32 v0, v7 offset:24
2361; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:16
2362; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(4)
2363; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2364; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2365; CHECK-NEXT:    s_setpc_b64 s[30:31]
2366entry:
2367  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
2368  ret void
2369}
2370
2371define void @memmove_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
2372; CHECK-LABEL: memmove_p3_p0_sz32_align_1_1:
2373; CHECK:       ; %bb.0: ; %entry
2374; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375; CHECK-NEXT:    s_clause 0x1
2376; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2377; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2378; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2379; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2380; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2381; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2382; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2383; CHECK-NEXT:    s_setpc_b64 s[30:31]
2384entry:
2385  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
2386  ret void
2387}
2388
2389define void @memmove_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2390; CHECK-LABEL: memmove_p3_p0_sz16_align_2_2:
2391; CHECK:       ; %bb.0: ; %entry
2392; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2393; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2394; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2395; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2396; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2397; CHECK-NEXT:    s_setpc_b64 s[30:31]
2398entry:
2399  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
2400  ret void
2401}
2402
2403define void @memmove_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2404; CHECK-LABEL: memmove_p3_p0_sz31_align_2_2:
2405; CHECK:       ; %bb.0: ; %entry
2406; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407; CHECK-NEXT:    s_clause 0x3
2408; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
2409; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
2410; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
2411; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2412; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
2413; CHECK-NEXT:    ds_write_b8 v0, v8 offset:30
2414; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
2415; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
2416; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(3)
2417; CHECK-NEXT:    ds_write_b32 v0, v7 offset:24
2418; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:16
2419; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(4)
2420; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2421; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2422; CHECK-NEXT:    s_setpc_b64 s[30:31]
2423entry:
2424  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
2425  ret void
2426}
2427
2428define void @memmove_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
2429; CHECK-LABEL: memmove_p3_p0_sz32_align_2_2:
2430; CHECK:       ; %bb.0: ; %entry
2431; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2432; CHECK-NEXT:    s_clause 0x1
2433; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2434; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2435; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2436; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2437; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2438; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2439; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2440; CHECK-NEXT:    s_setpc_b64 s[30:31]
2441entry:
2442  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
2443  ret void
2444}
2445
2446define void @memmove_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2447; CHECK-LABEL: memmove_p3_p0_sz16_align_8_8:
2448; CHECK:       ; %bb.0: ; %entry
2449; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2450; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2451; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2452; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2453; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2454; CHECK-NEXT:    s_setpc_b64 s[30:31]
2455entry:
2456  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
2457  ret void
2458}
2459
2460define void @memmove_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2461; CHECK-LABEL: memmove_p3_p0_sz31_align_8_8:
2462; CHECK:       ; %bb.0: ; %entry
2463; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2464; CHECK-NEXT:    s_clause 0x3
2465; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2]
2466; CHECK-NEXT:    flat_load_dwordx3 v[7:9], v[1:2] offset:16
2467; CHECK-NEXT:    flat_load_ubyte v10, v[1:2] offset:30
2468; CHECK-NEXT:    flat_load_ushort v1, v[1:2] offset:28
2469; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
2470; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2471; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(3)
2472; CHECK-NEXT:    ds_write2_b32 v0, v7, v8 offset0:4 offset1:5
2473; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
2474; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(4)
2475; CHECK-NEXT:    ds_write_b8 v0, v10 offset:30
2476; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(4)
2477; CHECK-NEXT:    ds_write_b16 v0, v1 offset:28
2478; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2479; CHECK-NEXT:    s_setpc_b64 s[30:31]
2480entry:
2481  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
2482  ret void
2483}
2484
2485define void @memmove_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
2486; CHECK-LABEL: memmove_p3_p0_sz32_align_8_8:
2487; CHECK:       ; %bb.0: ; %entry
2488; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2489; CHECK-NEXT:    s_clause 0x1
2490; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2491; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2492; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2493; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
2494; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2495; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
2496; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2497; CHECK-NEXT:    s_setpc_b64 s[30:31]
2498entry:
2499  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
2500  ret void
2501}
2502
2503define void @memmove_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2504; CHECK-LABEL: memmove_p3_p0_sz16_align_16_16:
2505; CHECK:       ; %bb.0: ; %entry
2506; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2507; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
2508; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2509; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2510; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2511; CHECK-NEXT:    s_setpc_b64 s[30:31]
2512entry:
2513  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
2514  ret void
2515}
2516
2517define void @memmove_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2518; CHECK-LABEL: memmove_p3_p0_sz31_align_16_16:
2519; CHECK:       ; %bb.0: ; %entry
2520; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2521; CHECK-NEXT:    s_clause 0x3
2522; CHECK-NEXT:    flat_load_ubyte v7, v[1:2] offset:30
2523; CHECK-NEXT:    flat_load_dword v8, v[1:2] offset:16
2524; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
2525; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2]
2526; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
2527; CHECK-NEXT:    ds_write_b32 v0, v8 offset:16
2528; CHECK-NEXT:    flat_load_dword v8, v[1:2] offset:20
2529; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2530; CHECK-NEXT:    ds_write_b32 v0, v8 offset:20
2531; CHECK-NEXT:    flat_load_dword v1, v[1:2] offset:24
2532; CHECK-NEXT:    ds_write_b8 v0, v7 offset:30
2533; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
2534; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
2535; CHECK-NEXT:    ds_write_b32 v0, v1 offset:24
2536; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2537; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2538; CHECK-NEXT:    s_setpc_b64 s[30:31]
2539entry:
2540  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
2541  ret void
2542}
2543
2544define void @memmove_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
2545; CHECK-LABEL: memmove_p3_p0_sz32_align_16_16:
2546; CHECK:       ; %bb.0: ; %entry
2547; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; CHECK-NEXT:    s_clause 0x1
2549; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
2550; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
2551; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
2552; CHECK-NEXT:    ds_write_b128 v0, v[3:6] offset:16
2553; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
2554; CHECK-NEXT:    ds_write_b128 v0, v[7:10]
2555; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2556; CHECK-NEXT:    s_setpc_b64 s[30:31]
2557entry:
2558  tail call void @llvm.memmove.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
2559  ret void
2560}
2561
2562define void @memmove_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2563; CHECK-LABEL: memmove_p3_p1_sz16_align_1_1:
2564; CHECK:       ; %bb.0: ; %entry
2565; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2567; CHECK-NEXT:    s_waitcnt vmcnt(0)
2568; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2569; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2570; CHECK-NEXT:    s_setpc_b64 s[30:31]
2571entry:
2572  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
2573  ret void
2574}
2575
2576define void @memmove_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2577; CHECK-LABEL: memmove_p3_p1_sz31_align_1_1:
2578; CHECK:       ; %bb.0: ; %entry
2579; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2580; CHECK-NEXT:    s_clause 0x3
2581; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2582; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
2583; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
2584; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
2585; CHECK-NEXT:    s_waitcnt vmcnt(3)
2586; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2587; CHECK-NEXT:    s_waitcnt vmcnt(2)
2588; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2589; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
2590; CHECK-NEXT:    s_waitcnt vmcnt(1)
2591; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
2592; CHECK-NEXT:    s_waitcnt vmcnt(0)
2593; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
2594; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2595; CHECK-NEXT:    s_setpc_b64 s[30:31]
2596entry:
2597  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
2598  ret void
2599}
2600
2601define void @memmove_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
2602; CHECK-LABEL: memmove_p3_p1_sz32_align_1_1:
2603; CHECK:       ; %bb.0: ; %entry
2604; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605; CHECK-NEXT:    s_clause 0x1
2606; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2607; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2608; CHECK-NEXT:    s_waitcnt vmcnt(1)
2609; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2610; CHECK-NEXT:    s_waitcnt vmcnt(0)
2611; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2612; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2613; CHECK-NEXT:    s_setpc_b64 s[30:31]
2614entry:
2615  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
2616  ret void
2617}
2618
2619define void @memmove_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2620; CHECK-LABEL: memmove_p3_p1_sz16_align_2_2:
2621; CHECK:       ; %bb.0: ; %entry
2622; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2623; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2624; CHECK-NEXT:    s_waitcnt vmcnt(0)
2625; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2626; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2627; CHECK-NEXT:    s_setpc_b64 s[30:31]
2628entry:
2629  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
2630  ret void
2631}
2632
2633define void @memmove_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2634; CHECK-LABEL: memmove_p3_p1_sz31_align_2_2:
2635; CHECK:       ; %bb.0: ; %entry
2636; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2637; CHECK-NEXT:    s_clause 0x3
2638; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2639; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
2640; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
2641; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
2642; CHECK-NEXT:    s_waitcnt vmcnt(3)
2643; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2644; CHECK-NEXT:    s_waitcnt vmcnt(2)
2645; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
2646; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
2647; CHECK-NEXT:    s_waitcnt vmcnt(1)
2648; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
2649; CHECK-NEXT:    s_waitcnt vmcnt(0)
2650; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
2651; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2652; CHECK-NEXT:    s_setpc_b64 s[30:31]
2653entry:
2654  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
2655  ret void
2656}
2657
2658define void @memmove_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
2659; CHECK-LABEL: memmove_p3_p1_sz32_align_2_2:
2660; CHECK:       ; %bb.0: ; %entry
2661; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2662; CHECK-NEXT:    s_clause 0x1
2663; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2664; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2665; CHECK-NEXT:    s_waitcnt vmcnt(1)
2666; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2667; CHECK-NEXT:    s_waitcnt vmcnt(0)
2668; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2669; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2670; CHECK-NEXT:    s_setpc_b64 s[30:31]
2671entry:
2672  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
2673  ret void
2674}
2675
2676define void @memmove_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2677; CHECK-LABEL: memmove_p3_p1_sz16_align_8_8:
2678; CHECK:       ; %bb.0: ; %entry
2679; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2680; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2681; CHECK-NEXT:    s_waitcnt vmcnt(0)
2682; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2683; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2684; CHECK-NEXT:    s_setpc_b64 s[30:31]
2685entry:
2686  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
2687  ret void
2688}
2689
2690define void @memmove_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2691; CHECK-LABEL: memmove_p3_p1_sz31_align_8_8:
2692; CHECK:       ; %bb.0: ; %entry
2693; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2694; CHECK-NEXT:    s_clause 0x3
2695; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2696; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
2697; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
2698; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
2699; CHECK-NEXT:    s_waitcnt vmcnt(3)
2700; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2701; CHECK-NEXT:    s_waitcnt vmcnt(2)
2702; CHECK-NEXT:    ds_write2_b32 v0, v8, v9 offset0:5 offset1:6
2703; CHECK-NEXT:    ds_write_b32 v0, v7 offset:16
2704; CHECK-NEXT:    s_waitcnt vmcnt(1)
2705; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
2706; CHECK-NEXT:    s_waitcnt vmcnt(0)
2707; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
2708; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2709; CHECK-NEXT:    s_setpc_b64 s[30:31]
2710entry:
2711  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
2712  ret void
2713}
2714
2715define void @memmove_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
2716; CHECK-LABEL: memmove_p3_p1_sz32_align_8_8:
2717; CHECK:       ; %bb.0: ; %entry
2718; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2719; CHECK-NEXT:    s_clause 0x1
2720; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2721; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2722; CHECK-NEXT:    s_waitcnt vmcnt(1)
2723; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
2724; CHECK-NEXT:    s_waitcnt vmcnt(0)
2725; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
2726; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2727; CHECK-NEXT:    s_setpc_b64 s[30:31]
2728entry:
2729  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
2730  ret void
2731}
2732
2733define void @memmove_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2734; CHECK-LABEL: memmove_p3_p1_sz16_align_16_16:
2735; CHECK:       ; %bb.0: ; %entry
2736; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
2738; CHECK-NEXT:    s_waitcnt vmcnt(0)
2739; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2740; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2741; CHECK-NEXT:    s_setpc_b64 s[30:31]
2742entry:
2743  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
2744  ret void
2745}
2746
2747define void @memmove_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2748; CHECK-LABEL: memmove_p3_p1_sz31_align_16_16:
2749; CHECK:       ; %bb.0: ; %entry
2750; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2751; CHECK-NEXT:    s_clause 0x3
2752; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
2753; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2754; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
2755; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
2756; CHECK-NEXT:    s_waitcnt vmcnt(3)
2757; CHECK-NEXT:    ds_write2_b32 v0, v8, v9 offset0:5 offset1:6
2758; CHECK-NEXT:    s_waitcnt vmcnt(2)
2759; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2760; CHECK-NEXT:    ds_write_b32 v0, v7 offset:16
2761; CHECK-NEXT:    s_waitcnt vmcnt(1)
2762; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
2763; CHECK-NEXT:    s_waitcnt vmcnt(0)
2764; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
2765; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2766; CHECK-NEXT:    s_setpc_b64 s[30:31]
2767entry:
2768  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
2769  ret void
2770}
2771
2772define void @memmove_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
2773; CHECK-LABEL: memmove_p3_p1_sz32_align_16_16:
2774; CHECK:       ; %bb.0: ; %entry
2775; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2776; CHECK-NEXT:    s_clause 0x1
2777; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
2778; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
2779; CHECK-NEXT:    s_waitcnt vmcnt(1)
2780; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
2781; CHECK-NEXT:    s_waitcnt vmcnt(0)
2782; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:16
2783; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2784; CHECK-NEXT:    s_setpc_b64 s[30:31]
2785entry:
2786  tail call void @llvm.memmove.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
2787  ret void
2788}
2789
2790define void @memmove_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2791; CHECK-LABEL: memmove_p3_p3_sz16_align_1_1:
2792; CHECK:       ; %bb.0: ; %entry
2793; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2794; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2795; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2796; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2797; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2798; CHECK-NEXT:    s_setpc_b64 s[30:31]
2799entry:
2800  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
2801  ret void
2802}
2803
2804define void @memmove_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2805; CHECK-LABEL: memmove_p3_p3_sz31_align_1_1:
2806; CHECK:       ; %bb.0: ; %entry
2807; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2808; CHECK-NEXT:    ds_read_u8 v7, v1 offset:30
2809; CHECK-NEXT:    ds_read_u16 v8, v1 offset:28
2810; CHECK-NEXT:    ds_read_b32 v9, v1 offset:24
2811; CHECK-NEXT:    ds_read_b64 v[5:6], v1 offset:16
2812; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2813; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2814; CHECK-NEXT:    ds_write_b8 v0, v7 offset:30
2815; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2816; CHECK-NEXT:    ds_write_b16 v0, v8 offset:28
2817; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2818; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
2819; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2820; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:16
2821; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2822; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2823; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2824; CHECK-NEXT:    s_setpc_b64 s[30:31]
2825entry:
2826  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
2827  ret void
2828}
2829
2830define void @memmove_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
2831; CHECK-LABEL: memmove_p3_p3_sz32_align_1_1:
2832; CHECK:       ; %bb.0: ; %entry
2833; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2834; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2835; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2836; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2837; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2838; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2839; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2840; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2841; CHECK-NEXT:    s_setpc_b64 s[30:31]
2842entry:
2843  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
2844  ret void
2845}
2846
2847define void @memmove_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2848; CHECK-LABEL: memmove_p3_p3_sz16_align_2_2:
2849; CHECK:       ; %bb.0: ; %entry
2850; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2851; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2852; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2853; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2854; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2855; CHECK-NEXT:    s_setpc_b64 s[30:31]
2856entry:
2857  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
2858  ret void
2859}
2860
2861define void @memmove_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2862; CHECK-LABEL: memmove_p3_p3_sz31_align_2_2:
2863; CHECK:       ; %bb.0: ; %entry
2864; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2865; CHECK-NEXT:    ds_read_u8 v7, v1 offset:30
2866; CHECK-NEXT:    ds_read_u16 v8, v1 offset:28
2867; CHECK-NEXT:    ds_read_b32 v9, v1 offset:24
2868; CHECK-NEXT:    ds_read_b64 v[5:6], v1 offset:16
2869; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2870; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2871; CHECK-NEXT:    ds_write_b8 v0, v7 offset:30
2872; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2873; CHECK-NEXT:    ds_write_b16 v0, v8 offset:28
2874; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2875; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
2876; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2877; CHECK-NEXT:    ds_write_b64 v0, v[5:6] offset:16
2878; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2879; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2880; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2881; CHECK-NEXT:    s_setpc_b64 s[30:31]
2882entry:
2883  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
2884  ret void
2885}
2886
2887define void @memmove_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
2888; CHECK-LABEL: memmove_p3_p3_sz32_align_2_2:
2889; CHECK:       ; %bb.0: ; %entry
2890; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2892; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2893; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2894; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2895; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2896; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2897; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2898; CHECK-NEXT:    s_setpc_b64 s[30:31]
2899entry:
2900  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
2901  ret void
2902}
2903
2904define void @memmove_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2905; CHECK-LABEL: memmove_p3_p3_sz16_align_8_8:
2906; CHECK:       ; %bb.0: ; %entry
2907; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
2909; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2910; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
2911; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2912; CHECK-NEXT:    s_setpc_b64 s[30:31]
2913entry:
2914  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
2915  ret void
2916}
2917
2918define void @memmove_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2919; CHECK-LABEL: memmove_p3_p3_sz31_align_8_8:
2920; CHECK:       ; %bb.0: ; %entry
2921; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2922; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
2923; CHECK-NEXT:    ds_read2_b32 v[6:7], v1 offset0:4 offset1:5
2924; CHECK-NEXT:    ds_read_b32 v8, v1 offset:24
2925; CHECK-NEXT:    ds_read_u8 v9, v1 offset:30
2926; CHECK-NEXT:    ds_read_u16 v1, v1 offset:28
2927; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2928; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
2929; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2930; CHECK-NEXT:    ds_write2_b32 v0, v6, v7 offset0:4 offset1:5
2931; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2932; CHECK-NEXT:    ds_write_b32 v0, v8 offset:24
2933; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2934; CHECK-NEXT:    ds_write_b8 v0, v9 offset:30
2935; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2936; CHECK-NEXT:    ds_write_b16 v0, v1 offset:28
2937; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2938; CHECK-NEXT:    s_setpc_b64 s[30:31]
2939entry:
2940  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
2941  ret void
2942}
2943
2944define void @memmove_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
2945; CHECK-LABEL: memmove_p3_p3_sz32_align_8_8:
2946; CHECK:       ; %bb.0: ; %entry
2947; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2948; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
2949; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset1:1
2950; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2951; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
2952; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
2953; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
2954; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2955; CHECK-NEXT:    s_setpc_b64 s[30:31]
2956entry:
2957  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
2958  ret void
2959}
2960
2961define void @memmove_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2962; CHECK-LABEL: memmove_p3_p3_sz16_align_16_16:
2963; CHECK:       ; %bb.0: ; %entry
2964; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2965; CHECK-NEXT:    ds_read_b128 v[1:4], v1
2966; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2967; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2968; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2969; CHECK-NEXT:    s_setpc_b64 s[30:31]
2970entry:
2971  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
2972  ret void
2973}
2974
2975define void @memmove_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
2976; CHECK-LABEL: memmove_p3_p3_sz31_align_16_16:
2977; CHECK:       ; %bb.0: ; %entry
2978; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2979; CHECK-NEXT:    ds_read2_b32 v[5:6], v1 offset0:4 offset1:5
2980; CHECK-NEXT:    ds_read_b32 v7, v1 offset:24
2981; CHECK-NEXT:    ds_read_u8 v8, v1 offset:30
2982; CHECK-NEXT:    ds_read_u16 v9, v1 offset:28
2983; CHECK-NEXT:    ds_read_b128 v[1:4], v1
2984; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2985; CHECK-NEXT:    ds_write2_b32 v0, v5, v6 offset0:4 offset1:5
2986; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2987; CHECK-NEXT:    ds_write_b32 v0, v7 offset:24
2988; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2989; CHECK-NEXT:    ds_write_b8 v0, v8 offset:30
2990; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2991; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
2992; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
2993; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
2994; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
2995; CHECK-NEXT:    s_setpc_b64 s[30:31]
2996entry:
2997  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
2998  ret void
2999}
3000
3001define void @memmove_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
3002; CHECK-LABEL: memmove_p3_p3_sz32_align_16_16:
3003; CHECK:       ; %bb.0: ; %entry
3004; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3005; CHECK-NEXT:    ds_read_b128 v[2:5], v1 offset:16
3006; CHECK-NEXT:    ds_read_b128 v[6:9], v1
3007; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3008; CHECK-NEXT:    ds_write_b128 v0, v[2:5] offset:16
3009; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
3010; CHECK-NEXT:    ds_write_b128 v0, v[6:9]
3011; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3012; CHECK-NEXT:    s_setpc_b64 s[30:31]
3013entry:
3014  tail call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
3015  ret void
3016}
3017
3018define void @memmove_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
3019; CHECK-LABEL: memmove_p3_p4_sz16_align_1_1:
3020; CHECK:       ; %bb.0: ; %entry
3021; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3022; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3023; CHECK-NEXT:    s_waitcnt vmcnt(0)
3024; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
3025; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3026; CHECK-NEXT:    s_setpc_b64 s[30:31]
3027entry:
3028  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
3029  ret void
3030}
3031
3032define void @memmove_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
3033; CHECK-LABEL: memmove_p3_p4_sz31_align_1_1:
3034; CHECK:       ; %bb.0: ; %entry
3035; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3036; CHECK-NEXT:    s_clause 0x3
3037; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3038; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
3039; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
3040; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
3041; CHECK-NEXT:    s_waitcnt vmcnt(3)
3042; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3043; CHECK-NEXT:    s_waitcnt vmcnt(2)
3044; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
3045; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
3046; CHECK-NEXT:    s_waitcnt vmcnt(1)
3047; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
3048; CHECK-NEXT:    s_waitcnt vmcnt(0)
3049; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
3050; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3051; CHECK-NEXT:    s_setpc_b64 s[30:31]
3052entry:
3053  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
3054  ret void
3055}
3056
3057define void @memmove_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
3058; CHECK-LABEL: memmove_p3_p4_sz32_align_1_1:
3059; CHECK:       ; %bb.0: ; %entry
3060; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3061; CHECK-NEXT:    s_clause 0x1
3062; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3063; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3064; CHECK-NEXT:    s_waitcnt vmcnt(1)
3065; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3066; CHECK-NEXT:    s_waitcnt vmcnt(0)
3067; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
3068; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3069; CHECK-NEXT:    s_setpc_b64 s[30:31]
3070entry:
3071  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
3072  ret void
3073}
3074
3075define void @memmove_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
3076; CHECK-LABEL: memmove_p3_p4_sz16_align_2_2:
3077; CHECK:       ; %bb.0: ; %entry
3078; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3079; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3080; CHECK-NEXT:    s_waitcnt vmcnt(0)
3081; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
3082; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3083; CHECK-NEXT:    s_setpc_b64 s[30:31]
3084entry:
3085  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
3086  ret void
3087}
3088
3089define void @memmove_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
3090; CHECK-LABEL: memmove_p3_p4_sz31_align_2_2:
3091; CHECK:       ; %bb.0: ; %entry
3092; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3093; CHECK-NEXT:    s_clause 0x3
3094; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3095; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
3096; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
3097; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
3098; CHECK-NEXT:    s_waitcnt vmcnt(3)
3099; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3100; CHECK-NEXT:    s_waitcnt vmcnt(2)
3101; CHECK-NEXT:    ds_write_b64 v0, v[7:8] offset:16
3102; CHECK-NEXT:    ds_write_b32 v0, v9 offset:24
3103; CHECK-NEXT:    s_waitcnt vmcnt(1)
3104; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
3105; CHECK-NEXT:    s_waitcnt vmcnt(0)
3106; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
3107; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3108; CHECK-NEXT:    s_setpc_b64 s[30:31]
3109entry:
3110  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
3111  ret void
3112}
3113
3114define void @memmove_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
3115; CHECK-LABEL: memmove_p3_p4_sz32_align_2_2:
3116; CHECK:       ; %bb.0: ; %entry
3117; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3118; CHECK-NEXT:    s_clause 0x1
3119; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3120; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3121; CHECK-NEXT:    s_waitcnt vmcnt(1)
3122; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3123; CHECK-NEXT:    s_waitcnt vmcnt(0)
3124; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
3125; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3126; CHECK-NEXT:    s_setpc_b64 s[30:31]
3127entry:
3128  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
3129  ret void
3130}
3131
3132define void @memmove_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
3133; CHECK-LABEL: memmove_p3_p4_sz16_align_8_8:
3134; CHECK:       ; %bb.0: ; %entry
3135; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3136; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3137; CHECK-NEXT:    s_waitcnt vmcnt(0)
3138; CHECK-NEXT:    ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
3139; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3140; CHECK-NEXT:    s_setpc_b64 s[30:31]
3141entry:
3142  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
3143  ret void
3144}
3145
3146define void @memmove_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
3147; CHECK-LABEL: memmove_p3_p4_sz31_align_8_8:
3148; CHECK:       ; %bb.0: ; %entry
3149; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3150; CHECK-NEXT:    s_clause 0x3
3151; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3152; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
3153; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
3154; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
3155; CHECK-NEXT:    s_waitcnt vmcnt(3)
3156; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3157; CHECK-NEXT:    s_waitcnt vmcnt(2)
3158; CHECK-NEXT:    ds_write2_b32 v0, v8, v9 offset0:5 offset1:6
3159; CHECK-NEXT:    ds_write_b32 v0, v7 offset:16
3160; CHECK-NEXT:    s_waitcnt vmcnt(1)
3161; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
3162; CHECK-NEXT:    s_waitcnt vmcnt(0)
3163; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
3164; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3165; CHECK-NEXT:    s_setpc_b64 s[30:31]
3166entry:
3167  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
3168  ret void
3169}
3170
3171define void @memmove_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
3172; CHECK-LABEL: memmove_p3_p4_sz32_align_8_8:
3173; CHECK:       ; %bb.0: ; %entry
3174; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175; CHECK-NEXT:    s_clause 0x1
3176; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3177; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3178; CHECK-NEXT:    s_waitcnt vmcnt(1)
3179; CHECK-NEXT:    ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
3180; CHECK-NEXT:    s_waitcnt vmcnt(0)
3181; CHECK-NEXT:    ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
3182; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3183; CHECK-NEXT:    s_setpc_b64 s[30:31]
3184entry:
3185  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
3186  ret void
3187}
3188
3189define void @memmove_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
3190; CHECK-LABEL: memmove_p3_p4_sz16_align_16_16:
3191; CHECK:       ; %bb.0: ; %entry
3192; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3193; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3194; CHECK-NEXT:    s_waitcnt vmcnt(0)
3195; CHECK-NEXT:    ds_write_b128 v0, v[1:4]
3196; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3197; CHECK-NEXT:    s_setpc_b64 s[30:31]
3198entry:
3199  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
3200  ret void
3201}
3202
3203define void @memmove_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
3204; CHECK-LABEL: memmove_p3_p4_sz31_align_16_16:
3205; CHECK:       ; %bb.0: ; %entry
3206; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3207; CHECK-NEXT:    s_clause 0x3
3208; CHECK-NEXT:    global_load_dwordx3 v[7:9], v[1:2], off offset:16
3209; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3210; CHECK-NEXT:    global_load_ushort v10, v[1:2], off offset:28
3211; CHECK-NEXT:    global_load_ubyte v1, v[1:2], off offset:30
3212; CHECK-NEXT:    s_waitcnt vmcnt(3)
3213; CHECK-NEXT:    ds_write2_b32 v0, v8, v9 offset0:5 offset1:6
3214; CHECK-NEXT:    s_waitcnt vmcnt(2)
3215; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
3216; CHECK-NEXT:    ds_write_b32 v0, v7 offset:16
3217; CHECK-NEXT:    s_waitcnt vmcnt(1)
3218; CHECK-NEXT:    ds_write_b16 v0, v10 offset:28
3219; CHECK-NEXT:    s_waitcnt vmcnt(0)
3220; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
3221; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3222; CHECK-NEXT:    s_setpc_b64 s[30:31]
3223entry:
3224  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
3225  ret void
3226}
3227
3228define void @memmove_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
3229; CHECK-LABEL: memmove_p3_p4_sz32_align_16_16:
3230; CHECK:       ; %bb.0: ; %entry
3231; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232; CHECK-NEXT:    s_clause 0x1
3233; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3234; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3235; CHECK-NEXT:    s_waitcnt vmcnt(1)
3236; CHECK-NEXT:    ds_write_b128 v0, v[3:6]
3237; CHECK-NEXT:    s_waitcnt vmcnt(0)
3238; CHECK-NEXT:    ds_write_b128 v0, v[7:10] offset:16
3239; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3240; CHECK-NEXT:    s_setpc_b64 s[30:31]
3241entry:
3242  tail call void @llvm.memmove.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
3243  ret void
3244}
3245
3246define void @memmove_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3247; CHECK-LABEL: memmove_p3_p5_sz16_align_1_1:
3248; CHECK:       ; %bb.0: ; %entry
3249; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3250; CHECK-NEXT:    s_clause 0x3
3251; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3252; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3253; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3254; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3255; CHECK-NEXT:    s_waitcnt vmcnt(0)
3256; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3257; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3258; CHECK-NEXT:    s_setpc_b64 s[30:31]
3259entry:
3260  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
3261  ret void
3262}
3263
3264define void @memmove_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3265; CHECK-LABEL: memmove_p3_p5_sz31_align_1_1:
3266; CHECK:       ; %bb.0: ; %entry
3267; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3268; CHECK-NEXT:    s_clause 0x8
3269; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3270; CHECK-NEXT:    buffer_load_ushort v9, v1, s[0:3], 0 offen offset:28
3271; CHECK-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:30
3272; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3273; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3274; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3275; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3276; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3277; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3278; CHECK-NEXT:    s_waitcnt vmcnt(8)
3279; CHECK-NEXT:    ds_write_b32 v0, v8 offset:24
3280; CHECK-NEXT:    s_waitcnt vmcnt(7)
3281; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
3282; CHECK-NEXT:    s_waitcnt vmcnt(6)
3283; CHECK-NEXT:    ds_write_b8 v0, v10 offset:30
3284; CHECK-NEXT:    s_waitcnt vmcnt(2)
3285; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3286; CHECK-NEXT:    s_waitcnt vmcnt(0)
3287; CHECK-NEXT:    ds_write_b64 v0, v[6:7] offset:16
3288; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3289; CHECK-NEXT:    s_setpc_b64 s[30:31]
3290entry:
3291  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
3292  ret void
3293}
3294
3295define void @memmove_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
3296; CHECK-LABEL: memmove_p3_p5_sz32_align_1_1:
3297; CHECK:       ; %bb.0: ; %entry
3298; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3299; CHECK-NEXT:    s_clause 0x7
3300; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3301; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3302; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3303; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3304; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3305; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3306; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3307; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3308; CHECK-NEXT:    s_waitcnt vmcnt(4)
3309; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3310; CHECK-NEXT:    s_waitcnt vmcnt(0)
3311; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3312; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3313; CHECK-NEXT:    s_setpc_b64 s[30:31]
3314entry:
3315  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
3316  ret void
3317}
3318
3319define void @memmove_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3320; CHECK-LABEL: memmove_p3_p5_sz16_align_2_2:
3321; CHECK:       ; %bb.0: ; %entry
3322; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323; CHECK-NEXT:    s_clause 0x3
3324; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3325; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3326; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3327; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3328; CHECK-NEXT:    s_waitcnt vmcnt(0)
3329; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3330; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3331; CHECK-NEXT:    s_setpc_b64 s[30:31]
3332entry:
3333  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
3334  ret void
3335}
3336
3337define void @memmove_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3338; CHECK-LABEL: memmove_p3_p5_sz31_align_2_2:
3339; CHECK:       ; %bb.0: ; %entry
3340; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3341; CHECK-NEXT:    s_clause 0x8
3342; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3343; CHECK-NEXT:    buffer_load_ushort v9, v1, s[0:3], 0 offen offset:28
3344; CHECK-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:30
3345; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3346; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3347; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3348; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3349; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3350; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3351; CHECK-NEXT:    s_waitcnt vmcnt(8)
3352; CHECK-NEXT:    ds_write_b32 v0, v8 offset:24
3353; CHECK-NEXT:    s_waitcnt vmcnt(7)
3354; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
3355; CHECK-NEXT:    s_waitcnt vmcnt(6)
3356; CHECK-NEXT:    ds_write_b8 v0, v10 offset:30
3357; CHECK-NEXT:    s_waitcnt vmcnt(2)
3358; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3359; CHECK-NEXT:    s_waitcnt vmcnt(0)
3360; CHECK-NEXT:    ds_write_b64 v0, v[6:7] offset:16
3361; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3362; CHECK-NEXT:    s_setpc_b64 s[30:31]
3363entry:
3364  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
3365  ret void
3366}
3367
3368define void @memmove_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
3369; CHECK-LABEL: memmove_p3_p5_sz32_align_2_2:
3370; CHECK:       ; %bb.0: ; %entry
3371; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3372; CHECK-NEXT:    s_clause 0x7
3373; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3374; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3375; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3376; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3377; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3378; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3379; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3380; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3381; CHECK-NEXT:    s_waitcnt vmcnt(4)
3382; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3383; CHECK-NEXT:    s_waitcnt vmcnt(0)
3384; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3385; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3386; CHECK-NEXT:    s_setpc_b64 s[30:31]
3387entry:
3388  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
3389  ret void
3390}
3391
3392define void @memmove_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3393; CHECK-LABEL: memmove_p3_p5_sz16_align_8_8:
3394; CHECK:       ; %bb.0: ; %entry
3395; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3396; CHECK-NEXT:    s_clause 0x3
3397; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3398; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3399; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3400; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3401; CHECK-NEXT:    s_waitcnt vmcnt(0)
3402; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3403; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3404; CHECK-NEXT:    s_setpc_b64 s[30:31]
3405entry:
3406  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
3407  ret void
3408}
3409
3410define void @memmove_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3411; CHECK-LABEL: memmove_p3_p5_sz31_align_8_8:
3412; CHECK:       ; %bb.0: ; %entry
3413; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3414; CHECK-NEXT:    s_clause 0x8
3415; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3416; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3417; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3418; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3419; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:20
3420; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:24
3421; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:16
3422; CHECK-NEXT:    buffer_load_ushort v9, v1, s[0:3], 0 offen offset:28
3423; CHECK-NEXT:    buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
3424; CHECK-NEXT:    s_waitcnt vmcnt(5)
3425; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3426; CHECK-NEXT:    s_waitcnt vmcnt(3)
3427; CHECK-NEXT:    ds_write2_b32 v0, v6, v7 offset0:5 offset1:6
3428; CHECK-NEXT:    s_waitcnt vmcnt(2)
3429; CHECK-NEXT:    ds_write_b32 v0, v8 offset:16
3430; CHECK-NEXT:    s_waitcnt vmcnt(1)
3431; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
3432; CHECK-NEXT:    s_waitcnt vmcnt(0)
3433; CHECK-NEXT:    ds_write_b8 v0, v1 offset:30
3434; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3435; CHECK-NEXT:    s_setpc_b64 s[30:31]
3436entry:
3437  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
3438  ret void
3439}
3440
3441define void @memmove_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
3442; CHECK-LABEL: memmove_p3_p5_sz32_align_8_8:
3443; CHECK:       ; %bb.0: ; %entry
3444; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3445; CHECK-NEXT:    s_clause 0x7
3446; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3447; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3448; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3449; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3450; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3451; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3452; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3453; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3454; CHECK-NEXT:    s_waitcnt vmcnt(4)
3455; CHECK-NEXT:    ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
3456; CHECK-NEXT:    s_waitcnt vmcnt(0)
3457; CHECK-NEXT:    ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
3458; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3459; CHECK-NEXT:    s_setpc_b64 s[30:31]
3460entry:
3461  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
3462  ret void
3463}
3464
3465define void @memmove_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3466; CHECK-LABEL: memmove_p3_p5_sz16_align_16_16:
3467; CHECK:       ; %bb.0: ; %entry
3468; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3469; CHECK-NEXT:    s_clause 0x3
3470; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3471; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3472; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3473; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3474; CHECK-NEXT:    s_waitcnt vmcnt(0)
3475; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3476; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3477; CHECK-NEXT:    s_setpc_b64 s[30:31]
3478entry:
3479  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
3480  ret void
3481}
3482
3483define void @memmove_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3484; CHECK-LABEL: memmove_p3_p5_sz31_align_16_16:
3485; CHECK:       ; %bb.0: ; %entry
3486; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3487; CHECK-NEXT:    s_clause 0x8
3488; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3489; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3490; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3491; CHECK-NEXT:    buffer_load_ushort v9, v1, s[0:3], 0 offen offset:28
3492; CHECK-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:30
3493; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3494; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3495; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3496; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3497; CHECK-NEXT:    s_waitcnt vmcnt(6)
3498; CHECK-NEXT:    ds_write2_b32 v0, v7, v8 offset0:5 offset1:6
3499; CHECK-NEXT:    ds_write_b32 v0, v6 offset:16
3500; CHECK-NEXT:    s_waitcnt vmcnt(5)
3501; CHECK-NEXT:    ds_write_b16 v0, v9 offset:28
3502; CHECK-NEXT:    s_waitcnt vmcnt(4)
3503; CHECK-NEXT:    ds_write_b8 v0, v10 offset:30
3504; CHECK-NEXT:    s_waitcnt vmcnt(0)
3505; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3506; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3507; CHECK-NEXT:    s_setpc_b64 s[30:31]
3508entry:
3509  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
3510  ret void
3511}
3512
3513define void @memmove_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
3514; CHECK-LABEL: memmove_p3_p5_sz32_align_16_16:
3515; CHECK:       ; %bb.0: ; %entry
3516; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3517; CHECK-NEXT:    s_clause 0x7
3518; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
3519; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
3520; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
3521; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
3522; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
3523; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
3524; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
3525; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
3526; CHECK-NEXT:    s_waitcnt vmcnt(4)
3527; CHECK-NEXT:    ds_write_b128 v0, v[2:5]
3528; CHECK-NEXT:    s_waitcnt vmcnt(0)
3529; CHECK-NEXT:    ds_write_b128 v0, v[6:9] offset:16
3530; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
3531; CHECK-NEXT:    s_setpc_b64 s[30:31]
3532entry:
3533  tail call void @llvm.memmove.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
3534  ret void
3535}
3536
3537define void @memmove_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3538; CHECK-LABEL: memmove_p5_p0_sz16_align_1_1:
3539; CHECK:       ; %bb.0: ; %entry
3540; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3541; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3542; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3543; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3544; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3545; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3546; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3547; CHECK-NEXT:    s_setpc_b64 s[30:31]
3548entry:
3549  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
3550  ret void
3551}
3552
3553define void @memmove_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3554; CHECK-LABEL: memmove_p5_p0_sz31_align_1_1:
3555; CHECK:       ; %bb.0: ; %entry
3556; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3557; CHECK-NEXT:    s_clause 0x3
3558; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
3559; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
3560; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
3561; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3562; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
3563; CHECK-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3564; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3565; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3566; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3567; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3568; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3569; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3570; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3571; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3572; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3573; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3574; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3575; CHECK-NEXT:    s_setpc_b64 s[30:31]
3576entry:
3577  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
3578  ret void
3579}
3580
3581define void @memmove_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
3582; CHECK-LABEL: memmove_p5_p0_sz32_align_1_1:
3583; CHECK:       ; %bb.0: ; %entry
3584; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3585; CHECK-NEXT:    s_clause 0x1
3586; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3587; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3588; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3589; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3590; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3591; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3592; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3593; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3594; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3595; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3596; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3597; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3598; CHECK-NEXT:    s_setpc_b64 s[30:31]
3599entry:
3600  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
3601  ret void
3602}
3603
3604define void @memmove_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3605; CHECK-LABEL: memmove_p5_p0_sz16_align_2_2:
3606; CHECK:       ; %bb.0: ; %entry
3607; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3608; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3609; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3610; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3611; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3612; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3613; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3614; CHECK-NEXT:    s_setpc_b64 s[30:31]
3615entry:
3616  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
3617  ret void
3618}
3619
3620define void @memmove_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3621; CHECK-LABEL: memmove_p5_p0_sz31_align_2_2:
3622; CHECK:       ; %bb.0: ; %entry
3623; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3624; CHECK-NEXT:    s_clause 0x3
3625; CHECK-NEXT:    flat_load_ubyte v8, v[1:2] offset:30
3626; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
3627; CHECK-NEXT:    flat_load_dwordx3 v[5:7], v[1:2] offset:16
3628; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3629; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
3630; CHECK-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
3631; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3632; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3633; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3634; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3635; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3636; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3637; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3638; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3639; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3640; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3641; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3642; CHECK-NEXT:    s_setpc_b64 s[30:31]
3643entry:
3644  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
3645  ret void
3646}
3647
3648define void @memmove_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
3649; CHECK-LABEL: memmove_p5_p0_sz32_align_2_2:
3650; CHECK:       ; %bb.0: ; %entry
3651; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3652; CHECK-NEXT:    s_clause 0x1
3653; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3654; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3655; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3656; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3657; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3658; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3659; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3660; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3661; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3662; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3663; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3664; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3665; CHECK-NEXT:    s_setpc_b64 s[30:31]
3666entry:
3667  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
3668  ret void
3669}
3670
3671define void @memmove_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3672; CHECK-LABEL: memmove_p5_p0_sz16_align_8_8:
3673; CHECK:       ; %bb.0: ; %entry
3674; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3675; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3676; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3677; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3678; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3679; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3680; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3681; CHECK-NEXT:    s_setpc_b64 s[30:31]
3682entry:
3683  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
3684  ret void
3685}
3686
3687define void @memmove_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3688; CHECK-LABEL: memmove_p5_p0_sz31_align_8_8:
3689; CHECK:       ; %bb.0: ; %entry
3690; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3691; CHECK-NEXT:    s_clause 0x4
3692; CHECK-NEXT:    flat_load_dword v7, v[1:2] offset:16
3693; CHECK-NEXT:    flat_load_dword v8, v[1:2] offset:24
3694; CHECK-NEXT:    flat_load_ubyte v9, v[1:2] offset:30
3695; CHECK-NEXT:    flat_load_ushort v10, v[1:2] offset:28
3696; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2]
3697; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(4)
3698; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3699; CHECK-NEXT:    flat_load_dword v1, v[1:2] offset:20
3700; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(4)
3701; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
3702; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(3)
3703; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3704; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3705; CHECK-NEXT:    buffer_store_short v10, v0, s[0:3], 0 offen offset:28
3706; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3707; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3708; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3709; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3710; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3711; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
3712; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3713; CHECK-NEXT:    s_setpc_b64 s[30:31]
3714entry:
3715  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
3716  ret void
3717}
3718
3719define void @memmove_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
3720; CHECK-LABEL: memmove_p5_p0_sz32_align_8_8:
3721; CHECK:       ; %bb.0: ; %entry
3722; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723; CHECK-NEXT:    s_clause 0x1
3724; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3725; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3726; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3727; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3728; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3729; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3730; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3731; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3732; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3733; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3734; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3735; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3736; CHECK-NEXT:    s_setpc_b64 s[30:31]
3737entry:
3738  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
3739  ret void
3740}
3741
3742define void @memmove_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3743; CHECK-LABEL: memmove_p5_p0_sz16_align_16_16:
3744; CHECK:       ; %bb.0: ; %entry
3745; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746; CHECK-NEXT:    flat_load_dwordx4 v[1:4], v[1:2]
3747; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3748; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3749; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3750; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3751; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3752; CHECK-NEXT:    s_setpc_b64 s[30:31]
3753entry:
3754  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
3755  ret void
3756}
3757
3758define void @memmove_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3759; CHECK-LABEL: memmove_p5_p0_sz31_align_16_16:
3760; CHECK:       ; %bb.0: ; %entry
3761; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3762; CHECK-NEXT:    s_clause 0x3
3763; CHECK-NEXT:    flat_load_ubyte v7, v[1:2] offset:30
3764; CHECK-NEXT:    flat_load_dword v8, v[1:2] offset:16
3765; CHECK-NEXT:    flat_load_ushort v9, v[1:2] offset:28
3766; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2]
3767; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
3768; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:16
3769; CHECK-NEXT:    flat_load_dword v8, v[1:2] offset:20
3770; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3771; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3772; CHECK-NEXT:    flat_load_dword v1, v[1:2] offset:24
3773; CHECK-NEXT:    buffer_store_byte v7, v0, s[0:3], 0 offen offset:30
3774; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
3775; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3776; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3777; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3778; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3779; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
3780; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3781; CHECK-NEXT:    s_setpc_b64 s[30:31]
3782entry:
3783  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
3784  ret void
3785}
3786
3787define void @memmove_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
3788; CHECK-LABEL: memmove_p5_p0_sz32_align_16_16:
3789; CHECK:       ; %bb.0: ; %entry
3790; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3791; CHECK-NEXT:    s_clause 0x1
3792; CHECK-NEXT:    flat_load_dwordx4 v[3:6], v[1:2] offset:16
3793; CHECK-NEXT:    flat_load_dwordx4 v[7:10], v[1:2]
3794; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
3795; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
3796; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
3797; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
3798; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
3799; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3800; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
3801; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
3802; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
3803; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
3804; CHECK-NEXT:    s_setpc_b64 s[30:31]
3805entry:
3806  tail call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
3807  ret void
3808}
3809
3810define void @memmove_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3811; CHECK-LABEL: memmove_p5_p1_sz16_align_1_1:
3812; CHECK:       ; %bb.0: ; %entry
3813; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3814; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3815; CHECK-NEXT:    s_waitcnt vmcnt(0)
3816; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3817; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3818; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3819; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3820; CHECK-NEXT:    s_setpc_b64 s[30:31]
3821entry:
3822  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
3823  ret void
3824}
3825
3826define void @memmove_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3827; CHECK-LABEL: memmove_p5_p1_sz31_align_1_1:
3828; CHECK:       ; %bb.0: ; %entry
3829; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3830; CHECK-NEXT:    s_clause 0x3
3831; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
3832; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
3833; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
3834; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3835; CHECK-NEXT:    s_waitcnt vmcnt(3)
3836; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3837; CHECK-NEXT:    s_waitcnt vmcnt(2)
3838; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3839; CHECK-NEXT:    s_waitcnt vmcnt(1)
3840; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3841; CHECK-NEXT:    s_waitcnt vmcnt(0)
3842; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3843; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3844; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3845; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3846; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3847; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3848; CHECK-NEXT:    s_setpc_b64 s[30:31]
3849entry:
3850  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
3851  ret void
3852}
3853
3854define void @memmove_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
3855; CHECK-LABEL: memmove_p5_p1_sz32_align_1_1:
3856; CHECK:       ; %bb.0: ; %entry
3857; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3858; CHECK-NEXT:    s_clause 0x1
3859; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3860; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3861; CHECK-NEXT:    s_waitcnt vmcnt(1)
3862; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3863; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3864; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3865; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3866; CHECK-NEXT:    s_waitcnt vmcnt(0)
3867; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3868; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3869; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3870; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3871; CHECK-NEXT:    s_setpc_b64 s[30:31]
3872entry:
3873  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
3874  ret void
3875}
3876
3877define void @memmove_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3878; CHECK-LABEL: memmove_p5_p1_sz16_align_2_2:
3879; CHECK:       ; %bb.0: ; %entry
3880; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3881; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3882; CHECK-NEXT:    s_waitcnt vmcnt(0)
3883; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3884; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3885; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3886; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3887; CHECK-NEXT:    s_setpc_b64 s[30:31]
3888entry:
3889  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
3890  ret void
3891}
3892
3893define void @memmove_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3894; CHECK-LABEL: memmove_p5_p1_sz31_align_2_2:
3895; CHECK:       ; %bb.0: ; %entry
3896; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3897; CHECK-NEXT:    s_clause 0x3
3898; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
3899; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
3900; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
3901; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3902; CHECK-NEXT:    s_waitcnt vmcnt(3)
3903; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3904; CHECK-NEXT:    s_waitcnt vmcnt(2)
3905; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3906; CHECK-NEXT:    s_waitcnt vmcnt(1)
3907; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3908; CHECK-NEXT:    s_waitcnt vmcnt(0)
3909; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3910; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3911; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3912; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3913; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3914; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3915; CHECK-NEXT:    s_setpc_b64 s[30:31]
3916entry:
3917  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
3918  ret void
3919}
3920
3921define void @memmove_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
3922; CHECK-LABEL: memmove_p5_p1_sz32_align_2_2:
3923; CHECK:       ; %bb.0: ; %entry
3924; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3925; CHECK-NEXT:    s_clause 0x1
3926; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3927; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3928; CHECK-NEXT:    s_waitcnt vmcnt(1)
3929; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3930; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
3931; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3932; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3933; CHECK-NEXT:    s_waitcnt vmcnt(0)
3934; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
3935; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
3936; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
3937; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
3938; CHECK-NEXT:    s_setpc_b64 s[30:31]
3939entry:
3940  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
3941  ret void
3942}
3943
3944define void @memmove_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3945; CHECK-LABEL: memmove_p5_p1_sz16_align_8_8:
3946; CHECK:       ; %bb.0: ; %entry
3947; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3948; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3949; CHECK-NEXT:    s_waitcnt vmcnt(0)
3950; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3951; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3952; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3953; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3954; CHECK-NEXT:    s_setpc_b64 s[30:31]
3955entry:
3956  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
3957  ret void
3958}
3959
3960define void @memmove_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3961; CHECK-LABEL: memmove_p5_p1_sz31_align_8_8:
3962; CHECK:       ; %bb.0: ; %entry
3963; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964; CHECK-NEXT:    s_clause 0x3
3965; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
3966; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
3967; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
3968; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
3969; CHECK-NEXT:    s_waitcnt vmcnt(3)
3970; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
3971; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
3972; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
3973; CHECK-NEXT:    s_waitcnt vmcnt(2)
3974; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
3975; CHECK-NEXT:    s_waitcnt vmcnt(1)
3976; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
3977; CHECK-NEXT:    s_waitcnt vmcnt(0)
3978; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
3979; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
3980; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
3981; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
3982; CHECK-NEXT:    s_setpc_b64 s[30:31]
3983entry:
3984  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
3985  ret void
3986}
3987
3988define void @memmove_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
3989; CHECK-LABEL: memmove_p5_p1_sz32_align_8_8:
3990; CHECK:       ; %bb.0: ; %entry
3991; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992; CHECK-NEXT:    s_clause 0x1
3993; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
3994; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
3995; CHECK-NEXT:    s_waitcnt vmcnt(1)
3996; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
3997; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
3998; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
3999; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4000; CHECK-NEXT:    s_waitcnt vmcnt(0)
4001; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4002; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4003; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4004; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4005; CHECK-NEXT:    s_setpc_b64 s[30:31]
4006entry:
4007  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
4008  ret void
4009}
4010
4011define void @memmove_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
4012; CHECK-LABEL: memmove_p5_p1_sz16_align_16_16:
4013; CHECK:       ; %bb.0: ; %entry
4014; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4015; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4016; CHECK-NEXT:    s_waitcnt vmcnt(0)
4017; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4018; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4019; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4020; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4021; CHECK-NEXT:    s_setpc_b64 s[30:31]
4022entry:
4023  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
4024  ret void
4025}
4026
4027define void @memmove_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
4028; CHECK-LABEL: memmove_p5_p1_sz31_align_16_16:
4029; CHECK:       ; %bb.0: ; %entry
4030; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4031; CHECK-NEXT:    s_clause 0x3
4032; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4033; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4034; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4035; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4036; CHECK-NEXT:    s_waitcnt vmcnt(3)
4037; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4038; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4039; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4040; CHECK-NEXT:    s_waitcnt vmcnt(2)
4041; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4042; CHECK-NEXT:    s_waitcnt vmcnt(1)
4043; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4044; CHECK-NEXT:    s_waitcnt vmcnt(0)
4045; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4046; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4047; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4048; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4049; CHECK-NEXT:    s_setpc_b64 s[30:31]
4050entry:
4051  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
4052  ret void
4053}
4054
4055define void @memmove_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
4056; CHECK-LABEL: memmove_p5_p1_sz32_align_16_16:
4057; CHECK:       ; %bb.0: ; %entry
4058; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4059; CHECK-NEXT:    s_clause 0x1
4060; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4061; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4062; CHECK-NEXT:    s_waitcnt vmcnt(1)
4063; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4064; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4065; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4066; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4067; CHECK-NEXT:    s_waitcnt vmcnt(0)
4068; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4069; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4070; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4071; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4072; CHECK-NEXT:    s_setpc_b64 s[30:31]
4073entry:
4074  tail call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
4075  ret void
4076}
4077
4078define void @memmove_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
4079; CHECK-LABEL: memmove_p5_p3_sz16_align_1_1:
4080; CHECK:       ; %bb.0: ; %entry
4081; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4082; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
4083; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4084; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4085; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4086; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4087; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4088; CHECK-NEXT:    s_setpc_b64 s[30:31]
4089entry:
4090  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
4091  ret void
4092}
4093
4094define void @memmove_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
4095; CHECK-LABEL: memmove_p5_p3_sz31_align_1_1:
4096; CHECK:       ; %bb.0: ; %entry
4097; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4098; CHECK-NEXT:    ds_read_b32 v8, v1 offset:24
4099; CHECK-NEXT:    ds_read_u16 v9, v1 offset:28
4100; CHECK-NEXT:    ds_read_u8 v10, v1 offset:30
4101; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
4102; CHECK-NEXT:    ds_read_b64 v[6:7], v1 offset:16
4103; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
4104; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4105; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
4106; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
4107; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
4108; CHECK-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
4109; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4110; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4111; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4112; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4113; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4114; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4115; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4116; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4117; CHECK-NEXT:    s_setpc_b64 s[30:31]
4118entry:
4119  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
4120  ret void
4121}
4122
4123define void @memmove_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
4124; CHECK-LABEL: memmove_p5_p3_sz32_align_1_1:
4125; CHECK:       ; %bb.0: ; %entry
4126; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4127; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
4128; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
4129; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4130; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4131; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4132; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4133; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4134; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4135; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4136; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4137; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4138; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4139; CHECK-NEXT:    s_setpc_b64 s[30:31]
4140entry:
4141  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
4142  ret void
4143}
4144
4145define void @memmove_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
4146; CHECK-LABEL: memmove_p5_p3_sz16_align_2_2:
4147; CHECK:       ; %bb.0: ; %entry
4148; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4149; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
4150; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4151; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4152; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4153; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4154; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4155; CHECK-NEXT:    s_setpc_b64 s[30:31]
4156entry:
4157  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
4158  ret void
4159}
4160
4161define void @memmove_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
4162; CHECK-LABEL: memmove_p5_p3_sz31_align_2_2:
4163; CHECK:       ; %bb.0: ; %entry
4164; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4165; CHECK-NEXT:    ds_read_b32 v8, v1 offset:24
4166; CHECK-NEXT:    ds_read_u16 v9, v1 offset:28
4167; CHECK-NEXT:    ds_read_u8 v10, v1 offset:30
4168; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
4169; CHECK-NEXT:    ds_read_b64 v[6:7], v1 offset:16
4170; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
4171; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4172; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
4173; CHECK-NEXT:    buffer_store_short v9, v0, s[0:3], 0 offen offset:28
4174; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
4175; CHECK-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
4176; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4177; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4178; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4179; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4180; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4181; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4182; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4183; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4184; CHECK-NEXT:    s_setpc_b64 s[30:31]
4185entry:
4186  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
4187  ret void
4188}
4189
4190define void @memmove_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
4191; CHECK-LABEL: memmove_p5_p3_sz32_align_2_2:
4192; CHECK:       ; %bb.0: ; %entry
4193; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4194; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
4195; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
4196; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4197; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4198; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4199; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4200; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4201; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4202; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4203; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4204; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4205; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4206; CHECK-NEXT:    s_setpc_b64 s[30:31]
4207entry:
4208  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
4209  ret void
4210}
4211
4212define void @memmove_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
4213; CHECK-LABEL: memmove_p5_p3_sz16_align_8_8:
4214; CHECK:       ; %bb.0: ; %entry
4215; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4216; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
4217; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4218; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4219; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4220; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4221; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4222; CHECK-NEXT:    s_setpc_b64 s[30:31]
4223entry:
4224  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
4225  ret void
4226}
4227
4228define void @memmove_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
4229; CHECK-LABEL: memmove_p5_p3_sz31_align_8_8:
4230; CHECK:       ; %bb.0: ; %entry
4231; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4232; CHECK-NEXT:    ds_read2_b32 v[5:6], v1 offset0:4 offset1:5
4233; CHECK-NEXT:    ds_read_b32 v7, v1 offset:24
4234; CHECK-NEXT:    ds_read_u16 v8, v1 offset:28
4235; CHECK-NEXT:    ds_read_u8 v9, v1 offset:30
4236; CHECK-NEXT:    ds_read2_b64 v[1:4], v1 offset1:1
4237; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
4238; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4239; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4240; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
4241; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4242; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
4243; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4244; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4245; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4246; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4247; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4248; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4249; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4250; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4251; CHECK-NEXT:    s_setpc_b64 s[30:31]
4252entry:
4253  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
4254  ret void
4255}
4256
4257define void @memmove_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
4258; CHECK-LABEL: memmove_p5_p3_sz32_align_8_8:
4259; CHECK:       ; %bb.0: ; %entry
4260; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261; CHECK-NEXT:    ds_read2_b64 v[2:5], v1 offset1:1
4262; CHECK-NEXT:    ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
4263; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4264; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4265; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4266; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4267; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4268; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4269; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4270; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4271; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4272; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4273; CHECK-NEXT:    s_setpc_b64 s[30:31]
4274entry:
4275  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
4276  ret void
4277}
4278
4279define void @memmove_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
4280; CHECK-LABEL: memmove_p5_p3_sz16_align_16_16:
4281; CHECK:       ; %bb.0: ; %entry
4282; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4283; CHECK-NEXT:    ds_read_b128 v[1:4], v1
4284; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4285; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4286; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4287; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4288; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4289; CHECK-NEXT:    s_setpc_b64 s[30:31]
4290entry:
4291  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
4292  ret void
4293}
4294
4295define void @memmove_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
4296; CHECK-LABEL: memmove_p5_p3_sz31_align_16_16:
4297; CHECK:       ; %bb.0: ; %entry
4298; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4299; CHECK-NEXT:    ds_read2_b32 v[5:6], v1 offset0:4 offset1:5
4300; CHECK-NEXT:    ds_read_b32 v7, v1 offset:24
4301; CHECK-NEXT:    ds_read_u16 v8, v1 offset:28
4302; CHECK-NEXT:    ds_read_u8 v9, v1 offset:30
4303; CHECK-NEXT:    ds_read_b128 v[1:4], v1
4304; CHECK-NEXT:    s_waitcnt lgkmcnt(4)
4305; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4306; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4307; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
4308; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4309; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
4310; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4311; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4312; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4313; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4314; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4315; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4316; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4317; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4318; CHECK-NEXT:    s_setpc_b64 s[30:31]
4319entry:
4320  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
4321  ret void
4322}
4323
4324define void @memmove_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
4325; CHECK-LABEL: memmove_p5_p3_sz32_align_16_16:
4326; CHECK:       ; %bb.0: ; %entry
4327; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4328; CHECK-NEXT:    ds_read_b128 v[2:5], v1
4329; CHECK-NEXT:    ds_read_b128 v[6:9], v1 offset:16
4330; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
4331; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
4332; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4333; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4334; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4335; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
4336; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
4337; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
4338; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
4339; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
4340; CHECK-NEXT:    s_setpc_b64 s[30:31]
4341entry:
4342  tail call void @llvm.memmove.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
4343  ret void
4344}
4345
4346define void @memmove_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4347; CHECK-LABEL: memmove_p5_p4_sz16_align_1_1:
4348; CHECK:       ; %bb.0: ; %entry
4349; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4350; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4351; CHECK-NEXT:    s_waitcnt vmcnt(0)
4352; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4353; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4354; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4355; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4356; CHECK-NEXT:    s_setpc_b64 s[30:31]
4357entry:
4358  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
4359  ret void
4360}
4361
4362define void @memmove_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4363; CHECK-LABEL: memmove_p5_p4_sz31_align_1_1:
4364; CHECK:       ; %bb.0: ; %entry
4365; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4366; CHECK-NEXT:    s_clause 0x3
4367; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4368; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4369; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4370; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4371; CHECK-NEXT:    s_waitcnt vmcnt(3)
4372; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4373; CHECK-NEXT:    s_waitcnt vmcnt(2)
4374; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4375; CHECK-NEXT:    s_waitcnt vmcnt(1)
4376; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4377; CHECK-NEXT:    s_waitcnt vmcnt(0)
4378; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4379; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4380; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4381; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4382; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4383; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4384; CHECK-NEXT:    s_setpc_b64 s[30:31]
4385entry:
4386  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
4387  ret void
4388}
4389
4390define void @memmove_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
4391; CHECK-LABEL: memmove_p5_p4_sz32_align_1_1:
4392; CHECK:       ; %bb.0: ; %entry
4393; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4394; CHECK-NEXT:    s_clause 0x1
4395; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4396; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4397; CHECK-NEXT:    s_waitcnt vmcnt(1)
4398; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4399; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4400; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4401; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4402; CHECK-NEXT:    s_waitcnt vmcnt(0)
4403; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4404; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4405; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4406; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4407; CHECK-NEXT:    s_setpc_b64 s[30:31]
4408entry:
4409  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
4410  ret void
4411}
4412
4413define void @memmove_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4414; CHECK-LABEL: memmove_p5_p4_sz16_align_2_2:
4415; CHECK:       ; %bb.0: ; %entry
4416; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4417; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4418; CHECK-NEXT:    s_waitcnt vmcnt(0)
4419; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4420; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4421; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4422; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4423; CHECK-NEXT:    s_setpc_b64 s[30:31]
4424entry:
4425  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
4426  ret void
4427}
4428
4429define void @memmove_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4430; CHECK-LABEL: memmove_p5_p4_sz31_align_2_2:
4431; CHECK:       ; %bb.0: ; %entry
4432; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4433; CHECK-NEXT:    s_clause 0x3
4434; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4435; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4436; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4437; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4438; CHECK-NEXT:    s_waitcnt vmcnt(3)
4439; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4440; CHECK-NEXT:    s_waitcnt vmcnt(2)
4441; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4442; CHECK-NEXT:    s_waitcnt vmcnt(1)
4443; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4444; CHECK-NEXT:    s_waitcnt vmcnt(0)
4445; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4446; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4447; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4448; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4449; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4450; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4451; CHECK-NEXT:    s_setpc_b64 s[30:31]
4452entry:
4453  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
4454  ret void
4455}
4456
4457define void @memmove_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
4458; CHECK-LABEL: memmove_p5_p4_sz32_align_2_2:
4459; CHECK:       ; %bb.0: ; %entry
4460; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4461; CHECK-NEXT:    s_clause 0x1
4462; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4463; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4464; CHECK-NEXT:    s_waitcnt vmcnt(1)
4465; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4466; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4467; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4468; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4469; CHECK-NEXT:    s_waitcnt vmcnt(0)
4470; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4471; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4472; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4473; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4474; CHECK-NEXT:    s_setpc_b64 s[30:31]
4475entry:
4476  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
4477  ret void
4478}
4479
4480define void @memmove_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4481; CHECK-LABEL: memmove_p5_p4_sz16_align_8_8:
4482; CHECK:       ; %bb.0: ; %entry
4483; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4484; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4485; CHECK-NEXT:    s_waitcnt vmcnt(0)
4486; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4487; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4488; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4489; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4490; CHECK-NEXT:    s_setpc_b64 s[30:31]
4491entry:
4492  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
4493  ret void
4494}
4495
4496define void @memmove_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4497; CHECK-LABEL: memmove_p5_p4_sz31_align_8_8:
4498; CHECK:       ; %bb.0: ; %entry
4499; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4500; CHECK-NEXT:    s_clause 0x3
4501; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4502; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4503; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4504; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4505; CHECK-NEXT:    s_waitcnt vmcnt(3)
4506; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4507; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4508; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4509; CHECK-NEXT:    s_waitcnt vmcnt(2)
4510; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4511; CHECK-NEXT:    s_waitcnt vmcnt(1)
4512; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4513; CHECK-NEXT:    s_waitcnt vmcnt(0)
4514; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4515; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4516; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4517; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4518; CHECK-NEXT:    s_setpc_b64 s[30:31]
4519entry:
4520  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
4521  ret void
4522}
4523
4524define void @memmove_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
4525; CHECK-LABEL: memmove_p5_p4_sz32_align_8_8:
4526; CHECK:       ; %bb.0: ; %entry
4527; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4528; CHECK-NEXT:    s_clause 0x1
4529; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4530; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4531; CHECK-NEXT:    s_waitcnt vmcnt(1)
4532; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4533; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4534; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4535; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4536; CHECK-NEXT:    s_waitcnt vmcnt(0)
4537; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4538; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4539; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4540; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4541; CHECK-NEXT:    s_setpc_b64 s[30:31]
4542entry:
4543  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
4544  ret void
4545}
4546
4547define void @memmove_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4548; CHECK-LABEL: memmove_p5_p4_sz16_align_16_16:
4549; CHECK:       ; %bb.0: ; %entry
4550; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4551; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4552; CHECK-NEXT:    s_waitcnt vmcnt(0)
4553; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4554; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4555; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4556; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4557; CHECK-NEXT:    s_setpc_b64 s[30:31]
4558entry:
4559  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
4560  ret void
4561}
4562
4563define void @memmove_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4564; CHECK-LABEL: memmove_p5_p4_sz31_align_16_16:
4565; CHECK:       ; %bb.0: ; %entry
4566; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4567; CHECK-NEXT:    s_clause 0x3
4568; CHECK-NEXT:    global_load_dwordx3 v[5:7], v[1:2], off offset:16
4569; CHECK-NEXT:    global_load_ushort v8, v[1:2], off offset:28
4570; CHECK-NEXT:    global_load_ubyte v9, v[1:2], off offset:30
4571; CHECK-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
4572; CHECK-NEXT:    s_waitcnt vmcnt(3)
4573; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
4574; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
4575; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
4576; CHECK-NEXT:    s_waitcnt vmcnt(2)
4577; CHECK-NEXT:    buffer_store_short v8, v0, s[0:3], 0 offen offset:28
4578; CHECK-NEXT:    s_waitcnt vmcnt(1)
4579; CHECK-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
4580; CHECK-NEXT:    s_waitcnt vmcnt(0)
4581; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
4582; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
4583; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
4584; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
4585; CHECK-NEXT:    s_setpc_b64 s[30:31]
4586entry:
4587  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
4588  ret void
4589}
4590
4591define void @memmove_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
4592; CHECK-LABEL: memmove_p5_p4_sz32_align_16_16:
4593; CHECK:       ; %bb.0: ; %entry
4594; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4595; CHECK-NEXT:    s_clause 0x1
4596; CHECK-NEXT:    global_load_dwordx4 v[3:6], v[1:2], off
4597; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[1:2], off offset:16
4598; CHECK-NEXT:    s_waitcnt vmcnt(1)
4599; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
4600; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
4601; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
4602; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
4603; CHECK-NEXT:    s_waitcnt vmcnt(0)
4604; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
4605; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
4606; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
4607; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
4608; CHECK-NEXT:    s_setpc_b64 s[30:31]
4609entry:
4610  tail call void @llvm.memmove.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
4611  ret void
4612}
4613
4614define void @memmove_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4615; CHECK-LABEL: memmove_p5_p5_sz16_align_1_1:
4616; CHECK:       ; %bb.0: ; %entry
4617; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4618; CHECK-NEXT:    s_clause 0x3
4619; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4620; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4621; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen
4622; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4623; CHECK-NEXT:    s_waitcnt vmcnt(3)
4624; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4625; CHECK-NEXT:    s_waitcnt vmcnt(2)
4626; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4627; CHECK-NEXT:    s_waitcnt vmcnt(1)
4628; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen
4629; CHECK-NEXT:    s_waitcnt vmcnt(0)
4630; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4631; CHECK-NEXT:    s_setpc_b64 s[30:31]
4632entry:
4633  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
4634  ret void
4635}
4636
4637define void @memmove_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4638; CHECK-LABEL: memmove_p5_p5_sz31_align_1_1:
4639; CHECK:       ; %bb.0: ; %entry
4640; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4641; CHECK-NEXT:    s_clause 0x8
4642; CHECK-NEXT:    buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4643; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4644; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4645; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4646; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4647; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4648; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4649; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4650; CHECK-NEXT:    buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4651; CHECK-NEXT:    s_waitcnt vmcnt(8)
4652; CHECK-NEXT:    buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4653; CHECK-NEXT:    s_waitcnt vmcnt(7)
4654; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4655; CHECK-NEXT:    s_waitcnt vmcnt(6)
4656; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4657; CHECK-NEXT:    s_waitcnt vmcnt(5)
4658; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4659; CHECK-NEXT:    s_waitcnt vmcnt(4)
4660; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4661; CHECK-NEXT:    s_waitcnt vmcnt(3)
4662; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4663; CHECK-NEXT:    s_waitcnt vmcnt(2)
4664; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4665; CHECK-NEXT:    s_waitcnt vmcnt(1)
4666; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4667; CHECK-NEXT:    s_waitcnt vmcnt(0)
4668; CHECK-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4669; CHECK-NEXT:    s_setpc_b64 s[30:31]
4670entry:
4671  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
4672  ret void
4673}
4674
4675define void @memmove_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
4676; CHECK-LABEL: memmove_p5_p5_sz32_align_1_1:
4677; CHECK:       ; %bb.0: ; %entry
4678; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4679; CHECK-NEXT:    s_clause 0x7
4680; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4681; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4682; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4683; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4684; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4685; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4686; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4687; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4688; CHECK-NEXT:    s_waitcnt vmcnt(7)
4689; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4690; CHECK-NEXT:    s_waitcnt vmcnt(6)
4691; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4692; CHECK-NEXT:    s_waitcnt vmcnt(5)
4693; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4694; CHECK-NEXT:    s_waitcnt vmcnt(4)
4695; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4696; CHECK-NEXT:    s_waitcnt vmcnt(3)
4697; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4698; CHECK-NEXT:    s_waitcnt vmcnt(2)
4699; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4700; CHECK-NEXT:    s_waitcnt vmcnt(1)
4701; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4702; CHECK-NEXT:    s_waitcnt vmcnt(0)
4703; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4704; CHECK-NEXT:    s_setpc_b64 s[30:31]
4705entry:
4706  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
4707  ret void
4708}
4709
4710define void @memmove_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4711; CHECK-LABEL: memmove_p5_p5_sz16_align_2_2:
4712; CHECK:       ; %bb.0: ; %entry
4713; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4714; CHECK-NEXT:    s_clause 0x3
4715; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
4716; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
4717; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen
4718; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4719; CHECK-NEXT:    s_waitcnt vmcnt(3)
4720; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
4721; CHECK-NEXT:    s_waitcnt vmcnt(2)
4722; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
4723; CHECK-NEXT:    s_waitcnt vmcnt(1)
4724; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen
4725; CHECK-NEXT:    s_waitcnt vmcnt(0)
4726; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4727; CHECK-NEXT:    s_setpc_b64 s[30:31]
4728entry:
4729  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
4730  ret void
4731}
4732
4733define void @memmove_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4734; CHECK-LABEL: memmove_p5_p5_sz31_align_2_2:
4735; CHECK:       ; %bb.0: ; %entry
4736; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4737; CHECK-NEXT:    s_clause 0x8
4738; CHECK-NEXT:    buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
4739; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
4740; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4741; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4742; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4743; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4744; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4745; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
4746; CHECK-NEXT:    buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
4747; CHECK-NEXT:    s_waitcnt vmcnt(8)
4748; CHECK-NEXT:    buffer_store_short v2, v0, s[0:3], 0 offen offset:28
4749; CHECK-NEXT:    s_waitcnt vmcnt(7)
4750; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
4751; CHECK-NEXT:    s_waitcnt vmcnt(6)
4752; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4753; CHECK-NEXT:    s_waitcnt vmcnt(5)
4754; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4755; CHECK-NEXT:    s_waitcnt vmcnt(4)
4756; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4757; CHECK-NEXT:    s_waitcnt vmcnt(3)
4758; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4759; CHECK-NEXT:    s_waitcnt vmcnt(2)
4760; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4761; CHECK-NEXT:    s_waitcnt vmcnt(1)
4762; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
4763; CHECK-NEXT:    s_waitcnt vmcnt(0)
4764; CHECK-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
4765; CHECK-NEXT:    s_setpc_b64 s[30:31]
4766entry:
4767  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
4768  ret void
4769}
4770
4771define void @memmove_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
4772; CHECK-LABEL: memmove_p5_p5_sz32_align_2_2:
4773; CHECK:       ; %bb.0: ; %entry
4774; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4775; CHECK-NEXT:    s_clause 0x7
4776; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4777; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
4778; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4779; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
4780; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
4781; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
4782; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen
4783; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
4784; CHECK-NEXT:    s_waitcnt vmcnt(7)
4785; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4786; CHECK-NEXT:    s_waitcnt vmcnt(6)
4787; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
4788; CHECK-NEXT:    s_waitcnt vmcnt(5)
4789; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4790; CHECK-NEXT:    s_waitcnt vmcnt(4)
4791; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
4792; CHECK-NEXT:    s_waitcnt vmcnt(3)
4793; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
4794; CHECK-NEXT:    s_waitcnt vmcnt(2)
4795; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
4796; CHECK-NEXT:    s_waitcnt vmcnt(1)
4797; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen
4798; CHECK-NEXT:    s_waitcnt vmcnt(0)
4799; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
4800; CHECK-NEXT:    s_setpc_b64 s[30:31]
4801entry:
4802  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
4803  ret void
4804}
4805
4806define void @memmove_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4807; CHECK-LABEL: memmove_p5_p5_sz16_align_8_8:
4808; CHECK:       ; %bb.0: ; %entry
4809; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4810; CHECK-NEXT:    s_clause 0x3
4811; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
4812; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4813; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4814; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4815; CHECK-NEXT:    s_waitcnt vmcnt(3)
4816; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4817; CHECK-NEXT:    s_waitcnt vmcnt(2)
4818; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4819; CHECK-NEXT:    s_waitcnt vmcnt(1)
4820; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4821; CHECK-NEXT:    s_waitcnt vmcnt(0)
4822; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4823; CHECK-NEXT:    s_setpc_b64 s[30:31]
4824entry:
4825  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
4826  ret void
4827}
4828
4829define void @memmove_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4830; CHECK-LABEL: memmove_p5_p5_sz31_align_8_8:
4831; CHECK:       ; %bb.0: ; %entry
4832; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4833; CHECK-NEXT:    s_clause 0x8
4834; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:20
4835; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:16
4836; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4837; CHECK-NEXT:    buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:30
4838; CHECK-NEXT:    buffer_load_ushort v6, v1, s[0:3], 0 offen offset:28
4839; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen
4840; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:4
4841; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:8
4842; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4843; CHECK-NEXT:    s_waitcnt vmcnt(8)
4844; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:20
4845; CHECK-NEXT:    s_waitcnt vmcnt(7)
4846; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
4847; CHECK-NEXT:    s_waitcnt vmcnt(6)
4848; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4849; CHECK-NEXT:    s_waitcnt vmcnt(5)
4850; CHECK-NEXT:    buffer_store_byte v5, v0, s[0:3], 0 offen offset:30
4851; CHECK-NEXT:    s_waitcnt vmcnt(4)
4852; CHECK-NEXT:    buffer_store_short v6, v0, s[0:3], 0 offen offset:28
4853; CHECK-NEXT:    s_waitcnt vmcnt(3)
4854; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
4855; CHECK-NEXT:    s_waitcnt vmcnt(2)
4856; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
4857; CHECK-NEXT:    s_waitcnt vmcnt(1)
4858; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
4859; CHECK-NEXT:    s_waitcnt vmcnt(0)
4860; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4861; CHECK-NEXT:    s_setpc_b64 s[30:31]
4862entry:
4863  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
4864  ret void
4865}
4866
4867define void @memmove_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
4868; CHECK-LABEL: memmove_p5_p5_sz32_align_8_8:
4869; CHECK:       ; %bb.0: ; %entry
4870; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4871; CHECK-NEXT:    s_clause 0x7
4872; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4873; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4874; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4875; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4876; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4877; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4878; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4879; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4880; CHECK-NEXT:    s_waitcnt vmcnt(7)
4881; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4882; CHECK-NEXT:    s_waitcnt vmcnt(6)
4883; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4884; CHECK-NEXT:    s_waitcnt vmcnt(5)
4885; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4886; CHECK-NEXT:    s_waitcnt vmcnt(4)
4887; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4888; CHECK-NEXT:    s_waitcnt vmcnt(3)
4889; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4890; CHECK-NEXT:    s_waitcnt vmcnt(2)
4891; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4892; CHECK-NEXT:    s_waitcnt vmcnt(1)
4893; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4894; CHECK-NEXT:    s_waitcnt vmcnt(0)
4895; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4896; CHECK-NEXT:    s_setpc_b64 s[30:31]
4897entry:
4898  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
4899  ret void
4900}
4901
4902define void @memmove_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4903; CHECK-LABEL: memmove_p5_p5_sz16_align_16_16:
4904; CHECK:       ; %bb.0: ; %entry
4905; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4906; CHECK-NEXT:    s_clause 0x3
4907; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen
4908; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
4909; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
4910; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4911; CHECK-NEXT:    s_waitcnt vmcnt(3)
4912; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
4913; CHECK-NEXT:    s_waitcnt vmcnt(2)
4914; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
4915; CHECK-NEXT:    s_waitcnt vmcnt(1)
4916; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
4917; CHECK-NEXT:    s_waitcnt vmcnt(0)
4918; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4919; CHECK-NEXT:    s_setpc_b64 s[30:31]
4920entry:
4921  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
4922  ret void
4923}
4924
4925define void @memmove_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4926; CHECK-LABEL: memmove_p5_p5_sz31_align_16_16:
4927; CHECK:       ; %bb.0: ; %entry
4928; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4929; CHECK-NEXT:    s_clause 0x8
4930; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
4931; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4932; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
4933; CHECK-NEXT:    buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:30
4934; CHECK-NEXT:    buffer_load_ushort v6, v1, s[0:3], 0 offen offset:28
4935; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen
4936; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:4
4937; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:8
4938; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4939; CHECK-NEXT:    s_waitcnt vmcnt(8)
4940; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
4941; CHECK-NEXT:    s_waitcnt vmcnt(7)
4942; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4943; CHECK-NEXT:    s_waitcnt vmcnt(6)
4944; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
4945; CHECK-NEXT:    s_waitcnt vmcnt(5)
4946; CHECK-NEXT:    buffer_store_byte v5, v0, s[0:3], 0 offen offset:30
4947; CHECK-NEXT:    s_waitcnt vmcnt(4)
4948; CHECK-NEXT:    buffer_store_short v6, v0, s[0:3], 0 offen offset:28
4949; CHECK-NEXT:    s_waitcnt vmcnt(3)
4950; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen
4951; CHECK-NEXT:    s_waitcnt vmcnt(2)
4952; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
4953; CHECK-NEXT:    s_waitcnt vmcnt(1)
4954; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
4955; CHECK-NEXT:    s_waitcnt vmcnt(0)
4956; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4957; CHECK-NEXT:    s_setpc_b64 s[30:31]
4958entry:
4959  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
4960  ret void
4961}
4962
4963define void @memmove_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
4964; CHECK-LABEL: memmove_p5_p5_sz32_align_16_16:
4965; CHECK:       ; %bb.0: ; %entry
4966; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4967; CHECK-NEXT:    s_clause 0x7
4968; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
4969; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
4970; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
4971; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
4972; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen
4973; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
4974; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
4975; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
4976; CHECK-NEXT:    s_waitcnt vmcnt(7)
4977; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
4978; CHECK-NEXT:    s_waitcnt vmcnt(6)
4979; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
4980; CHECK-NEXT:    s_waitcnt vmcnt(5)
4981; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
4982; CHECK-NEXT:    s_waitcnt vmcnt(4)
4983; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
4984; CHECK-NEXT:    s_waitcnt vmcnt(3)
4985; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen
4986; CHECK-NEXT:    s_waitcnt vmcnt(2)
4987; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
4988; CHECK-NEXT:    s_waitcnt vmcnt(1)
4989; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
4990; CHECK-NEXT:    s_waitcnt vmcnt(0)
4991; CHECK-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
4992; CHECK-NEXT:    s_setpc_b64 s[30:31]
4993entry:
4994  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
4995  ret void
4996}
4997
4998declare void @llvm.memmove.p0.p0.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2
4999declare void @llvm.memmove.p0.p1.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2
5000declare void @llvm.memmove.p0.p3.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #2
5001declare void @llvm.memmove.p0.p4.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2
5002declare void @llvm.memmove.p0.p5.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
5003declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2
5004declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2
5005declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #2
5006declare void @llvm.memmove.p1.p4.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2
5007declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
5008declare void @llvm.memmove.p3.p0.i64(ptr addrspace(3) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2
5009declare void @llvm.memmove.p3.p1.i64(ptr addrspace(3) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2
5010declare void @llvm.memmove.p3.p3.i64(ptr addrspace(3) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #2
5011declare void @llvm.memmove.p3.p4.i64(ptr addrspace(3) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2
5012declare void @llvm.memmove.p3.p5.i64(ptr addrspace(3) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
5013declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2
5014declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2
5015declare void @llvm.memmove.p5.p3.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #2
5016declare void @llvm.memmove.p5.p4.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2
5017declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
5018
5019attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
5020
5021