xref: /llvm-project/llvm/test/CodeGen/RISCV/memcpy-inline.ll (revision 9067070d91e9d8cdd8509ffa56a076f08a3d7281)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 \
3; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4; RUN: llc < %s -mtriple=riscv64 \
5; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
7; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
9; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10
11; ----------------------------------------------------------------------
12; Fully unaligned cases
13
14define void @unaligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
15; RV32-BOTH-LABEL: unaligned_memcpy0:
16; RV32-BOTH:       # %bb.0: # %entry
17; RV32-BOTH-NEXT:    ret
18;
19; RV64-BOTH-LABEL: unaligned_memcpy0:
20; RV64-BOTH:       # %bb.0: # %entry
21; RV64-BOTH-NEXT:    ret
22entry:
23  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false)
24  ret void
25}
26
27define void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
28; RV32-BOTH-LABEL: unaligned_memcpy1:
29; RV32-BOTH:       # %bb.0: # %entry
30; RV32-BOTH-NEXT:    lbu a1, 0(a1)
31; RV32-BOTH-NEXT:    sb a1, 0(a0)
32; RV32-BOTH-NEXT:    ret
33;
34; RV64-BOTH-LABEL: unaligned_memcpy1:
35; RV64-BOTH:       # %bb.0: # %entry
36; RV64-BOTH-NEXT:    lbu a1, 0(a1)
37; RV64-BOTH-NEXT:    sb a1, 0(a0)
38; RV64-BOTH-NEXT:    ret
39entry:
40  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
41  ret void
42}
43
44define void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
45; RV32-LABEL: unaligned_memcpy2:
46; RV32:       # %bb.0: # %entry
47; RV32-NEXT:    lbu a2, 1(a1)
48; RV32-NEXT:    sb a2, 1(a0)
49; RV32-NEXT:    lbu a1, 0(a1)
50; RV32-NEXT:    sb a1, 0(a0)
51; RV32-NEXT:    ret
52;
53; RV64-LABEL: unaligned_memcpy2:
54; RV64:       # %bb.0: # %entry
55; RV64-NEXT:    lbu a2, 1(a1)
56; RV64-NEXT:    sb a2, 1(a0)
57; RV64-NEXT:    lbu a1, 0(a1)
58; RV64-NEXT:    sb a1, 0(a0)
59; RV64-NEXT:    ret
60;
61; RV32-FAST-LABEL: unaligned_memcpy2:
62; RV32-FAST:       # %bb.0: # %entry
63; RV32-FAST-NEXT:    lh a1, 0(a1)
64; RV32-FAST-NEXT:    sh a1, 0(a0)
65; RV32-FAST-NEXT:    ret
66;
67; RV64-FAST-LABEL: unaligned_memcpy2:
68; RV64-FAST:       # %bb.0: # %entry
69; RV64-FAST-NEXT:    lh a1, 0(a1)
70; RV64-FAST-NEXT:    sh a1, 0(a0)
71; RV64-FAST-NEXT:    ret
72entry:
73  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
74  ret void
75}
76
77define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
78; RV32-LABEL: unaligned_memcpy3:
79; RV32:       # %bb.0: # %entry
80; RV32-NEXT:    lbu a2, 2(a1)
81; RV32-NEXT:    sb a2, 2(a0)
82; RV32-NEXT:    lbu a2, 1(a1)
83; RV32-NEXT:    sb a2, 1(a0)
84; RV32-NEXT:    lbu a1, 0(a1)
85; RV32-NEXT:    sb a1, 0(a0)
86; RV32-NEXT:    ret
87;
88; RV64-LABEL: unaligned_memcpy3:
89; RV64:       # %bb.0: # %entry
90; RV64-NEXT:    lbu a2, 2(a1)
91; RV64-NEXT:    sb a2, 2(a0)
92; RV64-NEXT:    lbu a2, 1(a1)
93; RV64-NEXT:    sb a2, 1(a0)
94; RV64-NEXT:    lbu a1, 0(a1)
95; RV64-NEXT:    sb a1, 0(a0)
96; RV64-NEXT:    ret
97;
98; RV32-FAST-LABEL: unaligned_memcpy3:
99; RV32-FAST:       # %bb.0: # %entry
100; RV32-FAST-NEXT:    lbu a2, 2(a1)
101; RV32-FAST-NEXT:    sb a2, 2(a0)
102; RV32-FAST-NEXT:    lh a1, 0(a1)
103; RV32-FAST-NEXT:    sh a1, 0(a0)
104; RV32-FAST-NEXT:    ret
105;
106; RV64-FAST-LABEL: unaligned_memcpy3:
107; RV64-FAST:       # %bb.0: # %entry
108; RV64-FAST-NEXT:    lbu a2, 2(a1)
109; RV64-FAST-NEXT:    sb a2, 2(a0)
110; RV64-FAST-NEXT:    lh a1, 0(a1)
111; RV64-FAST-NEXT:    sh a1, 0(a0)
112; RV64-FAST-NEXT:    ret
113entry:
114  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
115  ret void
116}
117
118define void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
119; RV32-LABEL: unaligned_memcpy4:
120; RV32:       # %bb.0: # %entry
121; RV32-NEXT:    lbu a2, 3(a1)
122; RV32-NEXT:    sb a2, 3(a0)
123; RV32-NEXT:    lbu a2, 2(a1)
124; RV32-NEXT:    sb a2, 2(a0)
125; RV32-NEXT:    lbu a2, 1(a1)
126; RV32-NEXT:    sb a2, 1(a0)
127; RV32-NEXT:    lbu a1, 0(a1)
128; RV32-NEXT:    sb a1, 0(a0)
129; RV32-NEXT:    ret
130;
131; RV64-LABEL: unaligned_memcpy4:
132; RV64:       # %bb.0: # %entry
133; RV64-NEXT:    lbu a2, 3(a1)
134; RV64-NEXT:    sb a2, 3(a0)
135; RV64-NEXT:    lbu a2, 2(a1)
136; RV64-NEXT:    sb a2, 2(a0)
137; RV64-NEXT:    lbu a2, 1(a1)
138; RV64-NEXT:    sb a2, 1(a0)
139; RV64-NEXT:    lbu a1, 0(a1)
140; RV64-NEXT:    sb a1, 0(a0)
141; RV64-NEXT:    ret
142;
143; RV32-FAST-LABEL: unaligned_memcpy4:
144; RV32-FAST:       # %bb.0: # %entry
145; RV32-FAST-NEXT:    lw a1, 0(a1)
146; RV32-FAST-NEXT:    sw a1, 0(a0)
147; RV32-FAST-NEXT:    ret
148;
149; RV64-FAST-LABEL: unaligned_memcpy4:
150; RV64-FAST:       # %bb.0: # %entry
151; RV64-FAST-NEXT:    lw a1, 0(a1)
152; RV64-FAST-NEXT:    sw a1, 0(a0)
153; RV64-FAST-NEXT:    ret
154entry:
155  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false)
156  ret void
157}
158
159define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
160; RV32-LABEL: unaligned_memcpy7:
161; RV32:       # %bb.0: # %entry
162; RV32-NEXT:    lbu a2, 6(a1)
163; RV32-NEXT:    sb a2, 6(a0)
164; RV32-NEXT:    lbu a2, 5(a1)
165; RV32-NEXT:    sb a2, 5(a0)
166; RV32-NEXT:    lbu a2, 4(a1)
167; RV32-NEXT:    sb a2, 4(a0)
168; RV32-NEXT:    lbu a2, 3(a1)
169; RV32-NEXT:    sb a2, 3(a0)
170; RV32-NEXT:    lbu a2, 2(a1)
171; RV32-NEXT:    sb a2, 2(a0)
172; RV32-NEXT:    lbu a2, 1(a1)
173; RV32-NEXT:    sb a2, 1(a0)
174; RV32-NEXT:    lbu a1, 0(a1)
175; RV32-NEXT:    sb a1, 0(a0)
176; RV32-NEXT:    ret
177;
178; RV64-LABEL: unaligned_memcpy7:
179; RV64:       # %bb.0: # %entry
180; RV64-NEXT:    lbu a2, 6(a1)
181; RV64-NEXT:    sb a2, 6(a0)
182; RV64-NEXT:    lbu a2, 5(a1)
183; RV64-NEXT:    sb a2, 5(a0)
184; RV64-NEXT:    lbu a2, 4(a1)
185; RV64-NEXT:    sb a2, 4(a0)
186; RV64-NEXT:    lbu a2, 3(a1)
187; RV64-NEXT:    sb a2, 3(a0)
188; RV64-NEXT:    lbu a2, 2(a1)
189; RV64-NEXT:    sb a2, 2(a0)
190; RV64-NEXT:    lbu a2, 1(a1)
191; RV64-NEXT:    sb a2, 1(a0)
192; RV64-NEXT:    lbu a1, 0(a1)
193; RV64-NEXT:    sb a1, 0(a0)
194; RV64-NEXT:    ret
195;
196; RV32-FAST-LABEL: unaligned_memcpy7:
197; RV32-FAST:       # %bb.0: # %entry
198; RV32-FAST-NEXT:    lw a2, 3(a1)
199; RV32-FAST-NEXT:    sw a2, 3(a0)
200; RV32-FAST-NEXT:    lw a1, 0(a1)
201; RV32-FAST-NEXT:    sw a1, 0(a0)
202; RV32-FAST-NEXT:    ret
203;
204; RV64-FAST-LABEL: unaligned_memcpy7:
205; RV64-FAST:       # %bb.0: # %entry
206; RV64-FAST-NEXT:    lw a2, 3(a1)
207; RV64-FAST-NEXT:    sw a2, 3(a0)
208; RV64-FAST-NEXT:    lw a1, 0(a1)
209; RV64-FAST-NEXT:    sw a1, 0(a0)
210; RV64-FAST-NEXT:    ret
211entry:
212  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false)
213  ret void
214}
215
216define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
217; RV32-LABEL: unaligned_memcpy8:
218; RV32:       # %bb.0: # %entry
219; RV32-NEXT:    lbu a2, 7(a1)
220; RV32-NEXT:    sb a2, 7(a0)
221; RV32-NEXT:    lbu a2, 6(a1)
222; RV32-NEXT:    sb a2, 6(a0)
223; RV32-NEXT:    lbu a2, 5(a1)
224; RV32-NEXT:    sb a2, 5(a0)
225; RV32-NEXT:    lbu a2, 4(a1)
226; RV32-NEXT:    sb a2, 4(a0)
227; RV32-NEXT:    lbu a2, 3(a1)
228; RV32-NEXT:    sb a2, 3(a0)
229; RV32-NEXT:    lbu a2, 2(a1)
230; RV32-NEXT:    sb a2, 2(a0)
231; RV32-NEXT:    lbu a2, 1(a1)
232; RV32-NEXT:    sb a2, 1(a0)
233; RV32-NEXT:    lbu a1, 0(a1)
234; RV32-NEXT:    sb a1, 0(a0)
235; RV32-NEXT:    ret
236;
237; RV64-LABEL: unaligned_memcpy8:
238; RV64:       # %bb.0: # %entry
239; RV64-NEXT:    lbu a2, 7(a1)
240; RV64-NEXT:    sb a2, 7(a0)
241; RV64-NEXT:    lbu a2, 6(a1)
242; RV64-NEXT:    sb a2, 6(a0)
243; RV64-NEXT:    lbu a2, 5(a1)
244; RV64-NEXT:    sb a2, 5(a0)
245; RV64-NEXT:    lbu a2, 4(a1)
246; RV64-NEXT:    sb a2, 4(a0)
247; RV64-NEXT:    lbu a2, 3(a1)
248; RV64-NEXT:    sb a2, 3(a0)
249; RV64-NEXT:    lbu a2, 2(a1)
250; RV64-NEXT:    sb a2, 2(a0)
251; RV64-NEXT:    lbu a2, 1(a1)
252; RV64-NEXT:    sb a2, 1(a0)
253; RV64-NEXT:    lbu a1, 0(a1)
254; RV64-NEXT:    sb a1, 0(a0)
255; RV64-NEXT:    ret
256;
257; RV32-FAST-LABEL: unaligned_memcpy8:
258; RV32-FAST:       # %bb.0: # %entry
259; RV32-FAST-NEXT:    lw a2, 4(a1)
260; RV32-FAST-NEXT:    sw a2, 4(a0)
261; RV32-FAST-NEXT:    lw a1, 0(a1)
262; RV32-FAST-NEXT:    sw a1, 0(a0)
263; RV32-FAST-NEXT:    ret
264;
265; RV64-FAST-LABEL: unaligned_memcpy8:
266; RV64-FAST:       # %bb.0: # %entry
267; RV64-FAST-NEXT:    ld a1, 0(a1)
268; RV64-FAST-NEXT:    sd a1, 0(a0)
269; RV64-FAST-NEXT:    ret
270entry:
271  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
272  ret void
273}
274
275define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
276; RV32-LABEL: unaligned_memcpy15:
277; RV32:       # %bb.0: # %entry
278; RV32-NEXT:    lbu a2, 14(a1)
279; RV32-NEXT:    sb a2, 14(a0)
280; RV32-NEXT:    lbu a2, 13(a1)
281; RV32-NEXT:    sb a2, 13(a0)
282; RV32-NEXT:    lbu a2, 12(a1)
283; RV32-NEXT:    sb a2, 12(a0)
284; RV32-NEXT:    lbu a2, 11(a1)
285; RV32-NEXT:    sb a2, 11(a0)
286; RV32-NEXT:    lbu a2, 10(a1)
287; RV32-NEXT:    sb a2, 10(a0)
288; RV32-NEXT:    lbu a2, 9(a1)
289; RV32-NEXT:    sb a2, 9(a0)
290; RV32-NEXT:    lbu a2, 8(a1)
291; RV32-NEXT:    sb a2, 8(a0)
292; RV32-NEXT:    lbu a2, 7(a1)
293; RV32-NEXT:    sb a2, 7(a0)
294; RV32-NEXT:    lbu a2, 6(a1)
295; RV32-NEXT:    sb a2, 6(a0)
296; RV32-NEXT:    lbu a2, 5(a1)
297; RV32-NEXT:    sb a2, 5(a0)
298; RV32-NEXT:    lbu a2, 4(a1)
299; RV32-NEXT:    sb a2, 4(a0)
300; RV32-NEXT:    lbu a2, 3(a1)
301; RV32-NEXT:    sb a2, 3(a0)
302; RV32-NEXT:    lbu a2, 2(a1)
303; RV32-NEXT:    sb a2, 2(a0)
304; RV32-NEXT:    lbu a2, 1(a1)
305; RV32-NEXT:    sb a2, 1(a0)
306; RV32-NEXT:    lbu a1, 0(a1)
307; RV32-NEXT:    sb a1, 0(a0)
308; RV32-NEXT:    ret
309;
310; RV64-LABEL: unaligned_memcpy15:
311; RV64:       # %bb.0: # %entry
312; RV64-NEXT:    lbu a2, 14(a1)
313; RV64-NEXT:    sb a2, 14(a0)
314; RV64-NEXT:    lbu a2, 13(a1)
315; RV64-NEXT:    sb a2, 13(a0)
316; RV64-NEXT:    lbu a2, 12(a1)
317; RV64-NEXT:    sb a2, 12(a0)
318; RV64-NEXT:    lbu a2, 11(a1)
319; RV64-NEXT:    sb a2, 11(a0)
320; RV64-NEXT:    lbu a2, 10(a1)
321; RV64-NEXT:    sb a2, 10(a0)
322; RV64-NEXT:    lbu a2, 9(a1)
323; RV64-NEXT:    sb a2, 9(a0)
324; RV64-NEXT:    lbu a2, 8(a1)
325; RV64-NEXT:    sb a2, 8(a0)
326; RV64-NEXT:    lbu a2, 7(a1)
327; RV64-NEXT:    sb a2, 7(a0)
328; RV64-NEXT:    lbu a2, 6(a1)
329; RV64-NEXT:    sb a2, 6(a0)
330; RV64-NEXT:    lbu a2, 5(a1)
331; RV64-NEXT:    sb a2, 5(a0)
332; RV64-NEXT:    lbu a2, 4(a1)
333; RV64-NEXT:    sb a2, 4(a0)
334; RV64-NEXT:    lbu a2, 3(a1)
335; RV64-NEXT:    sb a2, 3(a0)
336; RV64-NEXT:    lbu a2, 2(a1)
337; RV64-NEXT:    sb a2, 2(a0)
338; RV64-NEXT:    lbu a2, 1(a1)
339; RV64-NEXT:    sb a2, 1(a0)
340; RV64-NEXT:    lbu a1, 0(a1)
341; RV64-NEXT:    sb a1, 0(a0)
342; RV64-NEXT:    ret
343;
344; RV32-FAST-LABEL: unaligned_memcpy15:
345; RV32-FAST:       # %bb.0: # %entry
346; RV32-FAST-NEXT:    lw a2, 11(a1)
347; RV32-FAST-NEXT:    sw a2, 11(a0)
348; RV32-FAST-NEXT:    lw a2, 8(a1)
349; RV32-FAST-NEXT:    sw a2, 8(a0)
350; RV32-FAST-NEXT:    lw a2, 4(a1)
351; RV32-FAST-NEXT:    sw a2, 4(a0)
352; RV32-FAST-NEXT:    lw a1, 0(a1)
353; RV32-FAST-NEXT:    sw a1, 0(a0)
354; RV32-FAST-NEXT:    ret
355;
356; RV64-FAST-LABEL: unaligned_memcpy15:
357; RV64-FAST:       # %bb.0: # %entry
358; RV64-FAST-NEXT:    ld a2, 7(a1)
359; RV64-FAST-NEXT:    sd a2, 7(a0)
360; RV64-FAST-NEXT:    ld a1, 0(a1)
361; RV64-FAST-NEXT:    sd a1, 0(a0)
362; RV64-FAST-NEXT:    ret
363entry:
364  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false)
365  ret void
366}
367
368define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
369; RV32-LABEL: unaligned_memcpy16:
370; RV32:       # %bb.0: # %entry
371; RV32-NEXT:    lbu a2, 15(a1)
372; RV32-NEXT:    sb a2, 15(a0)
373; RV32-NEXT:    lbu a2, 14(a1)
374; RV32-NEXT:    sb a2, 14(a0)
375; RV32-NEXT:    lbu a2, 13(a1)
376; RV32-NEXT:    sb a2, 13(a0)
377; RV32-NEXT:    lbu a2, 12(a1)
378; RV32-NEXT:    sb a2, 12(a0)
379; RV32-NEXT:    lbu a2, 11(a1)
380; RV32-NEXT:    sb a2, 11(a0)
381; RV32-NEXT:    lbu a2, 10(a1)
382; RV32-NEXT:    sb a2, 10(a0)
383; RV32-NEXT:    lbu a2, 9(a1)
384; RV32-NEXT:    sb a2, 9(a0)
385; RV32-NEXT:    lbu a2, 8(a1)
386; RV32-NEXT:    sb a2, 8(a0)
387; RV32-NEXT:    lbu a2, 7(a1)
388; RV32-NEXT:    sb a2, 7(a0)
389; RV32-NEXT:    lbu a2, 6(a1)
390; RV32-NEXT:    sb a2, 6(a0)
391; RV32-NEXT:    lbu a2, 5(a1)
392; RV32-NEXT:    sb a2, 5(a0)
393; RV32-NEXT:    lbu a2, 4(a1)
394; RV32-NEXT:    sb a2, 4(a0)
395; RV32-NEXT:    lbu a2, 3(a1)
396; RV32-NEXT:    sb a2, 3(a0)
397; RV32-NEXT:    lbu a2, 2(a1)
398; RV32-NEXT:    sb a2, 2(a0)
399; RV32-NEXT:    lbu a2, 1(a1)
400; RV32-NEXT:    sb a2, 1(a0)
401; RV32-NEXT:    lbu a1, 0(a1)
402; RV32-NEXT:    sb a1, 0(a0)
403; RV32-NEXT:    ret
404;
405; RV64-LABEL: unaligned_memcpy16:
406; RV64:       # %bb.0: # %entry
407; RV64-NEXT:    lbu a2, 15(a1)
408; RV64-NEXT:    sb a2, 15(a0)
409; RV64-NEXT:    lbu a2, 14(a1)
410; RV64-NEXT:    sb a2, 14(a0)
411; RV64-NEXT:    lbu a2, 13(a1)
412; RV64-NEXT:    sb a2, 13(a0)
413; RV64-NEXT:    lbu a2, 12(a1)
414; RV64-NEXT:    sb a2, 12(a0)
415; RV64-NEXT:    lbu a2, 11(a1)
416; RV64-NEXT:    sb a2, 11(a0)
417; RV64-NEXT:    lbu a2, 10(a1)
418; RV64-NEXT:    sb a2, 10(a0)
419; RV64-NEXT:    lbu a2, 9(a1)
420; RV64-NEXT:    sb a2, 9(a0)
421; RV64-NEXT:    lbu a2, 8(a1)
422; RV64-NEXT:    sb a2, 8(a0)
423; RV64-NEXT:    lbu a2, 7(a1)
424; RV64-NEXT:    sb a2, 7(a0)
425; RV64-NEXT:    lbu a2, 6(a1)
426; RV64-NEXT:    sb a2, 6(a0)
427; RV64-NEXT:    lbu a2, 5(a1)
428; RV64-NEXT:    sb a2, 5(a0)
429; RV64-NEXT:    lbu a2, 4(a1)
430; RV64-NEXT:    sb a2, 4(a0)
431; RV64-NEXT:    lbu a2, 3(a1)
432; RV64-NEXT:    sb a2, 3(a0)
433; RV64-NEXT:    lbu a2, 2(a1)
434; RV64-NEXT:    sb a2, 2(a0)
435; RV64-NEXT:    lbu a2, 1(a1)
436; RV64-NEXT:    sb a2, 1(a0)
437; RV64-NEXT:    lbu a1, 0(a1)
438; RV64-NEXT:    sb a1, 0(a0)
439; RV64-NEXT:    ret
440;
441; RV32-FAST-LABEL: unaligned_memcpy16:
442; RV32-FAST:       # %bb.0: # %entry
443; RV32-FAST-NEXT:    lw a2, 12(a1)
444; RV32-FAST-NEXT:    sw a2, 12(a0)
445; RV32-FAST-NEXT:    lw a2, 8(a1)
446; RV32-FAST-NEXT:    sw a2, 8(a0)
447; RV32-FAST-NEXT:    lw a2, 4(a1)
448; RV32-FAST-NEXT:    sw a2, 4(a0)
449; RV32-FAST-NEXT:    lw a1, 0(a1)
450; RV32-FAST-NEXT:    sw a1, 0(a0)
451; RV32-FAST-NEXT:    ret
452;
453; RV64-FAST-LABEL: unaligned_memcpy16:
454; RV64-FAST:       # %bb.0: # %entry
455; RV64-FAST-NEXT:    ld a2, 8(a1)
456; RV64-FAST-NEXT:    sd a2, 8(a0)
457; RV64-FAST-NEXT:    ld a1, 0(a1)
458; RV64-FAST-NEXT:    sd a1, 0(a0)
459; RV64-FAST-NEXT:    ret
460entry:
461  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
462  ret void
463}
464
465define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
466; RV32-LABEL: unaligned_memcpy31:
467; RV32:       # %bb.0: # %entry
468; RV32-NEXT:    lbu a2, 30(a1)
469; RV32-NEXT:    sb a2, 30(a0)
470; RV32-NEXT:    lbu a2, 29(a1)
471; RV32-NEXT:    sb a2, 29(a0)
472; RV32-NEXT:    lbu a2, 28(a1)
473; RV32-NEXT:    sb a2, 28(a0)
474; RV32-NEXT:    lbu a2, 27(a1)
475; RV32-NEXT:    sb a2, 27(a0)
476; RV32-NEXT:    lbu a2, 26(a1)
477; RV32-NEXT:    sb a2, 26(a0)
478; RV32-NEXT:    lbu a2, 25(a1)
479; RV32-NEXT:    sb a2, 25(a0)
480; RV32-NEXT:    lbu a2, 24(a1)
481; RV32-NEXT:    sb a2, 24(a0)
482; RV32-NEXT:    lbu a2, 23(a1)
483; RV32-NEXT:    sb a2, 23(a0)
484; RV32-NEXT:    lbu a2, 22(a1)
485; RV32-NEXT:    sb a2, 22(a0)
486; RV32-NEXT:    lbu a2, 21(a1)
487; RV32-NEXT:    sb a2, 21(a0)
488; RV32-NEXT:    lbu a2, 20(a1)
489; RV32-NEXT:    sb a2, 20(a0)
490; RV32-NEXT:    lbu a2, 19(a1)
491; RV32-NEXT:    sb a2, 19(a0)
492; RV32-NEXT:    lbu a2, 18(a1)
493; RV32-NEXT:    sb a2, 18(a0)
494; RV32-NEXT:    lbu a2, 17(a1)
495; RV32-NEXT:    sb a2, 17(a0)
496; RV32-NEXT:    lbu a2, 16(a1)
497; RV32-NEXT:    sb a2, 16(a0)
498; RV32-NEXT:    lbu a2, 15(a1)
499; RV32-NEXT:    sb a2, 15(a0)
500; RV32-NEXT:    lbu a2, 14(a1)
501; RV32-NEXT:    sb a2, 14(a0)
502; RV32-NEXT:    lbu a2, 13(a1)
503; RV32-NEXT:    sb a2, 13(a0)
504; RV32-NEXT:    lbu a2, 12(a1)
505; RV32-NEXT:    sb a2, 12(a0)
506; RV32-NEXT:    lbu a2, 11(a1)
507; RV32-NEXT:    sb a2, 11(a0)
508; RV32-NEXT:    lbu a2, 10(a1)
509; RV32-NEXT:    sb a2, 10(a0)
510; RV32-NEXT:    lbu a2, 9(a1)
511; RV32-NEXT:    sb a2, 9(a0)
512; RV32-NEXT:    lbu a2, 8(a1)
513; RV32-NEXT:    sb a2, 8(a0)
514; RV32-NEXT:    lbu a2, 7(a1)
515; RV32-NEXT:    sb a2, 7(a0)
516; RV32-NEXT:    lbu a2, 6(a1)
517; RV32-NEXT:    sb a2, 6(a0)
518; RV32-NEXT:    lbu a2, 5(a1)
519; RV32-NEXT:    sb a2, 5(a0)
520; RV32-NEXT:    lbu a2, 4(a1)
521; RV32-NEXT:    sb a2, 4(a0)
522; RV32-NEXT:    lbu a2, 3(a1)
523; RV32-NEXT:    sb a2, 3(a0)
524; RV32-NEXT:    lbu a2, 2(a1)
525; RV32-NEXT:    sb a2, 2(a0)
526; RV32-NEXT:    lbu a2, 1(a1)
527; RV32-NEXT:    sb a2, 1(a0)
528; RV32-NEXT:    lbu a1, 0(a1)
529; RV32-NEXT:    sb a1, 0(a0)
530; RV32-NEXT:    ret
531;
532; RV64-LABEL: unaligned_memcpy31:
533; RV64:       # %bb.0: # %entry
534; RV64-NEXT:    lbu a2, 30(a1)
535; RV64-NEXT:    sb a2, 30(a0)
536; RV64-NEXT:    lbu a2, 29(a1)
537; RV64-NEXT:    sb a2, 29(a0)
538; RV64-NEXT:    lbu a2, 28(a1)
539; RV64-NEXT:    sb a2, 28(a0)
540; RV64-NEXT:    lbu a2, 27(a1)
541; RV64-NEXT:    sb a2, 27(a0)
542; RV64-NEXT:    lbu a2, 26(a1)
543; RV64-NEXT:    sb a2, 26(a0)
544; RV64-NEXT:    lbu a2, 25(a1)
545; RV64-NEXT:    sb a2, 25(a0)
546; RV64-NEXT:    lbu a2, 24(a1)
547; RV64-NEXT:    sb a2, 24(a0)
548; RV64-NEXT:    lbu a2, 23(a1)
549; RV64-NEXT:    sb a2, 23(a0)
550; RV64-NEXT:    lbu a2, 22(a1)
551; RV64-NEXT:    sb a2, 22(a0)
552; RV64-NEXT:    lbu a2, 21(a1)
553; RV64-NEXT:    sb a2, 21(a0)
554; RV64-NEXT:    lbu a2, 20(a1)
555; RV64-NEXT:    sb a2, 20(a0)
556; RV64-NEXT:    lbu a2, 19(a1)
557; RV64-NEXT:    sb a2, 19(a0)
558; RV64-NEXT:    lbu a2, 18(a1)
559; RV64-NEXT:    sb a2, 18(a0)
560; RV64-NEXT:    lbu a2, 17(a1)
561; RV64-NEXT:    sb a2, 17(a0)
562; RV64-NEXT:    lbu a2, 16(a1)
563; RV64-NEXT:    sb a2, 16(a0)
564; RV64-NEXT:    lbu a2, 15(a1)
565; RV64-NEXT:    sb a2, 15(a0)
566; RV64-NEXT:    lbu a2, 14(a1)
567; RV64-NEXT:    sb a2, 14(a0)
568; RV64-NEXT:    lbu a2, 13(a1)
569; RV64-NEXT:    sb a2, 13(a0)
570; RV64-NEXT:    lbu a2, 12(a1)
571; RV64-NEXT:    sb a2, 12(a0)
572; RV64-NEXT:    lbu a2, 11(a1)
573; RV64-NEXT:    sb a2, 11(a0)
574; RV64-NEXT:    lbu a2, 10(a1)
575; RV64-NEXT:    sb a2, 10(a0)
576; RV64-NEXT:    lbu a2, 9(a1)
577; RV64-NEXT:    sb a2, 9(a0)
578; RV64-NEXT:    lbu a2, 8(a1)
579; RV64-NEXT:    sb a2, 8(a0)
580; RV64-NEXT:    lbu a2, 7(a1)
581; RV64-NEXT:    sb a2, 7(a0)
582; RV64-NEXT:    lbu a2, 6(a1)
583; RV64-NEXT:    sb a2, 6(a0)
584; RV64-NEXT:    lbu a2, 5(a1)
585; RV64-NEXT:    sb a2, 5(a0)
586; RV64-NEXT:    lbu a2, 4(a1)
587; RV64-NEXT:    sb a2, 4(a0)
588; RV64-NEXT:    lbu a2, 3(a1)
589; RV64-NEXT:    sb a2, 3(a0)
590; RV64-NEXT:    lbu a2, 2(a1)
591; RV64-NEXT:    sb a2, 2(a0)
592; RV64-NEXT:    lbu a2, 1(a1)
593; RV64-NEXT:    sb a2, 1(a0)
594; RV64-NEXT:    lbu a1, 0(a1)
595; RV64-NEXT:    sb a1, 0(a0)
596; RV64-NEXT:    ret
597;
598; RV32-FAST-LABEL: unaligned_memcpy31:
599; RV32-FAST:       # %bb.0: # %entry
600; RV32-FAST-NEXT:    lw a2, 27(a1)
601; RV32-FAST-NEXT:    sw a2, 27(a0)
602; RV32-FAST-NEXT:    lw a2, 24(a1)
603; RV32-FAST-NEXT:    sw a2, 24(a0)
604; RV32-FAST-NEXT:    lw a2, 20(a1)
605; RV32-FAST-NEXT:    sw a2, 20(a0)
606; RV32-FAST-NEXT:    lw a2, 16(a1)
607; RV32-FAST-NEXT:    sw a2, 16(a0)
608; RV32-FAST-NEXT:    lw a2, 12(a1)
609; RV32-FAST-NEXT:    sw a2, 12(a0)
610; RV32-FAST-NEXT:    lw a2, 8(a1)
611; RV32-FAST-NEXT:    sw a2, 8(a0)
612; RV32-FAST-NEXT:    lw a2, 4(a1)
613; RV32-FAST-NEXT:    sw a2, 4(a0)
614; RV32-FAST-NEXT:    lw a1, 0(a1)
615; RV32-FAST-NEXT:    sw a1, 0(a0)
616; RV32-FAST-NEXT:    ret
617;
618; RV64-FAST-LABEL: unaligned_memcpy31:
619; RV64-FAST:       # %bb.0: # %entry
620; RV64-FAST-NEXT:    ld a2, 23(a1)
621; RV64-FAST-NEXT:    sd a2, 23(a0)
622; RV64-FAST-NEXT:    ld a2, 16(a1)
623; RV64-FAST-NEXT:    sd a2, 16(a0)
624; RV64-FAST-NEXT:    ld a2, 8(a1)
625; RV64-FAST-NEXT:    sd a2, 8(a0)
626; RV64-FAST-NEXT:    ld a1, 0(a1)
627; RV64-FAST-NEXT:    sd a1, 0(a0)
628; RV64-FAST-NEXT:    ret
629entry:
630  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
631  ret void
632}
633
634; ----------------------------------------------------------------------
635; Fully aligned cases
636
637define void @aligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
638; RV32-BOTH-LABEL: aligned_memcpy0:
639; RV32-BOTH:       # %bb.0: # %entry
640; RV32-BOTH-NEXT:    ret
641;
642; RV64-BOTH-LABEL: aligned_memcpy0:
643; RV64-BOTH:       # %bb.0: # %entry
644; RV64-BOTH-NEXT:    ret
645entry:
646  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 0, i1 false)
647  ret void
648}
649
650define void @aligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
651; RV32-BOTH-LABEL: aligned_memcpy1:
652; RV32-BOTH:       # %bb.0: # %entry
653; RV32-BOTH-NEXT:    lbu a1, 0(a1)
654; RV32-BOTH-NEXT:    sb a1, 0(a0)
655; RV32-BOTH-NEXT:    ret
656;
657; RV64-BOTH-LABEL: aligned_memcpy1:
658; RV64-BOTH:       # %bb.0: # %entry
659; RV64-BOTH-NEXT:    lbu a1, 0(a1)
660; RV64-BOTH-NEXT:    sb a1, 0(a0)
661; RV64-BOTH-NEXT:    ret
662entry:
663  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 1, i1 false)
664  ret void
665}
666
667define void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
668; RV32-BOTH-LABEL: aligned_memcpy2:
669; RV32-BOTH:       # %bb.0: # %entry
670; RV32-BOTH-NEXT:    lh a1, 0(a1)
671; RV32-BOTH-NEXT:    sh a1, 0(a0)
672; RV32-BOTH-NEXT:    ret
673;
674; RV64-BOTH-LABEL: aligned_memcpy2:
675; RV64-BOTH:       # %bb.0: # %entry
676; RV64-BOTH-NEXT:    lh a1, 0(a1)
677; RV64-BOTH-NEXT:    sh a1, 0(a0)
678; RV64-BOTH-NEXT:    ret
679entry:
680  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false)
681  ret void
682}
683
684define void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
685; RV32-BOTH-LABEL: aligned_memcpy3:
686; RV32-BOTH:       # %bb.0: # %entry
687; RV32-BOTH-NEXT:    lbu a2, 2(a1)
688; RV32-BOTH-NEXT:    sb a2, 2(a0)
689; RV32-BOTH-NEXT:    lh a1, 0(a1)
690; RV32-BOTH-NEXT:    sh a1, 0(a0)
691; RV32-BOTH-NEXT:    ret
692;
693; RV64-BOTH-LABEL: aligned_memcpy3:
694; RV64-BOTH:       # %bb.0: # %entry
695; RV64-BOTH-NEXT:    lbu a2, 2(a1)
696; RV64-BOTH-NEXT:    sb a2, 2(a0)
697; RV64-BOTH-NEXT:    lh a1, 0(a1)
698; RV64-BOTH-NEXT:    sh a1, 0(a0)
699; RV64-BOTH-NEXT:    ret
700entry:
701  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false)
702  ret void
703}
704
705define void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
706; RV32-BOTH-LABEL: aligned_memcpy4:
707; RV32-BOTH:       # %bb.0: # %entry
708; RV32-BOTH-NEXT:    lw a1, 0(a1)
709; RV32-BOTH-NEXT:    sw a1, 0(a0)
710; RV32-BOTH-NEXT:    ret
711;
712; RV64-BOTH-LABEL: aligned_memcpy4:
713; RV64-BOTH:       # %bb.0: # %entry
714; RV64-BOTH-NEXT:    lw a1, 0(a1)
715; RV64-BOTH-NEXT:    sw a1, 0(a0)
716; RV64-BOTH-NEXT:    ret
717entry:
718  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false)
719  ret void
720}
721
722define void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
723; RV32-LABEL: aligned_memcpy7:
724; RV32:       # %bb.0: # %entry
725; RV32-NEXT:    lbu a2, 6(a1)
726; RV32-NEXT:    sb a2, 6(a0)
727; RV32-NEXT:    lh a2, 4(a1)
728; RV32-NEXT:    sh a2, 4(a0)
729; RV32-NEXT:    lw a1, 0(a1)
730; RV32-NEXT:    sw a1, 0(a0)
731; RV32-NEXT:    ret
732;
733; RV64-LABEL: aligned_memcpy7:
734; RV64:       # %bb.0: # %entry
735; RV64-NEXT:    lbu a2, 6(a1)
736; RV64-NEXT:    sb a2, 6(a0)
737; RV64-NEXT:    lh a2, 4(a1)
738; RV64-NEXT:    sh a2, 4(a0)
739; RV64-NEXT:    lw a1, 0(a1)
740; RV64-NEXT:    sw a1, 0(a0)
741; RV64-NEXT:    ret
742;
743; RV32-FAST-LABEL: aligned_memcpy7:
744; RV32-FAST:       # %bb.0: # %entry
745; RV32-FAST-NEXT:    lw a2, 3(a1)
746; RV32-FAST-NEXT:    sw a2, 3(a0)
747; RV32-FAST-NEXT:    lw a1, 0(a1)
748; RV32-FAST-NEXT:    sw a1, 0(a0)
749; RV32-FAST-NEXT:    ret
750;
751; RV64-FAST-LABEL: aligned_memcpy7:
752; RV64-FAST:       # %bb.0: # %entry
753; RV64-FAST-NEXT:    lw a2, 3(a1)
754; RV64-FAST-NEXT:    sw a2, 3(a0)
755; RV64-FAST-NEXT:    lw a1, 0(a1)
756; RV64-FAST-NEXT:    sw a1, 0(a0)
757; RV64-FAST-NEXT:    ret
758entry:
759  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false)
760  ret void
761}
762
763define void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
764; RV32-BOTH-LABEL: aligned_memcpy8:
765; RV32-BOTH:       # %bb.0: # %entry
766; RV32-BOTH-NEXT:    lw a2, 4(a1)
767; RV32-BOTH-NEXT:    sw a2, 4(a0)
768; RV32-BOTH-NEXT:    lw a1, 0(a1)
769; RV32-BOTH-NEXT:    sw a1, 0(a0)
770; RV32-BOTH-NEXT:    ret
771;
772; RV64-BOTH-LABEL: aligned_memcpy8:
773; RV64-BOTH:       # %bb.0: # %entry
774; RV64-BOTH-NEXT:    ld a1, 0(a1)
775; RV64-BOTH-NEXT:    sd a1, 0(a0)
776; RV64-BOTH-NEXT:    ret
777entry:
778  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false)
779  ret void
780}
781
782define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
783; RV32-LABEL: aligned_memcpy15:
784; RV32:       # %bb.0: # %entry
785; RV32-NEXT:    lbu a2, 14(a1)
786; RV32-NEXT:    sb a2, 14(a0)
787; RV32-NEXT:    lh a2, 12(a1)
788; RV32-NEXT:    sh a2, 12(a0)
789; RV32-NEXT:    lw a2, 8(a1)
790; RV32-NEXT:    sw a2, 8(a0)
791; RV32-NEXT:    lw a2, 4(a1)
792; RV32-NEXT:    sw a2, 4(a0)
793; RV32-NEXT:    lw a1, 0(a1)
794; RV32-NEXT:    sw a1, 0(a0)
795; RV32-NEXT:    ret
796;
797; RV64-LABEL: aligned_memcpy15:
798; RV64:       # %bb.0: # %entry
799; RV64-NEXT:    lbu a2, 14(a1)
800; RV64-NEXT:    sb a2, 14(a0)
801; RV64-NEXT:    lh a2, 12(a1)
802; RV64-NEXT:    sh a2, 12(a0)
803; RV64-NEXT:    lw a2, 8(a1)
804; RV64-NEXT:    sw a2, 8(a0)
805; RV64-NEXT:    ld a1, 0(a1)
806; RV64-NEXT:    sd a1, 0(a0)
807; RV64-NEXT:    ret
808;
809; RV32-FAST-LABEL: aligned_memcpy15:
810; RV32-FAST:       # %bb.0: # %entry
811; RV32-FAST-NEXT:    lw a2, 11(a1)
812; RV32-FAST-NEXT:    sw a2, 11(a0)
813; RV32-FAST-NEXT:    lw a2, 8(a1)
814; RV32-FAST-NEXT:    sw a2, 8(a0)
815; RV32-FAST-NEXT:    lw a2, 4(a1)
816; RV32-FAST-NEXT:    sw a2, 4(a0)
817; RV32-FAST-NEXT:    lw a1, 0(a1)
818; RV32-FAST-NEXT:    sw a1, 0(a0)
819; RV32-FAST-NEXT:    ret
820;
821; RV64-FAST-LABEL: aligned_memcpy15:
822; RV64-FAST:       # %bb.0: # %entry
823; RV64-FAST-NEXT:    ld a2, 7(a1)
824; RV64-FAST-NEXT:    sd a2, 7(a0)
825; RV64-FAST-NEXT:    ld a1, 0(a1)
826; RV64-FAST-NEXT:    sd a1, 0(a0)
827; RV64-FAST-NEXT:    ret
828entry:
829  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false)
830  ret void
831}
832
833define void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
834; RV32-BOTH-LABEL: aligned_memcpy16:
835; RV32-BOTH:       # %bb.0: # %entry
836; RV32-BOTH-NEXT:    lw a2, 12(a1)
837; RV32-BOTH-NEXT:    sw a2, 12(a0)
838; RV32-BOTH-NEXT:    lw a2, 8(a1)
839; RV32-BOTH-NEXT:    sw a2, 8(a0)
840; RV32-BOTH-NEXT:    lw a2, 4(a1)
841; RV32-BOTH-NEXT:    sw a2, 4(a0)
842; RV32-BOTH-NEXT:    lw a1, 0(a1)
843; RV32-BOTH-NEXT:    sw a1, 0(a0)
844; RV32-BOTH-NEXT:    ret
845;
846; RV64-BOTH-LABEL: aligned_memcpy16:
847; RV64-BOTH:       # %bb.0: # %entry
848; RV64-BOTH-NEXT:    ld a2, 8(a1)
849; RV64-BOTH-NEXT:    sd a2, 8(a0)
850; RV64-BOTH-NEXT:    ld a1, 0(a1)
851; RV64-BOTH-NEXT:    sd a1, 0(a0)
852; RV64-BOTH-NEXT:    ret
853entry:
854  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false)
855  ret void
856}
857
858define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
859; RV32-LABEL: aligned_memcpy31:
860; RV32:       # %bb.0: # %entry
861; RV32-NEXT:    lbu a2, 30(a1)
862; RV32-NEXT:    sb a2, 30(a0)
863; RV32-NEXT:    lh a2, 28(a1)
864; RV32-NEXT:    sh a2, 28(a0)
865; RV32-NEXT:    lw a2, 24(a1)
866; RV32-NEXT:    sw a2, 24(a0)
867; RV32-NEXT:    lw a2, 20(a1)
868; RV32-NEXT:    sw a2, 20(a0)
869; RV32-NEXT:    lw a2, 16(a1)
870; RV32-NEXT:    sw a2, 16(a0)
871; RV32-NEXT:    lw a2, 12(a1)
872; RV32-NEXT:    sw a2, 12(a0)
873; RV32-NEXT:    lw a2, 8(a1)
874; RV32-NEXT:    sw a2, 8(a0)
875; RV32-NEXT:    lw a2, 4(a1)
876; RV32-NEXT:    sw a2, 4(a0)
877; RV32-NEXT:    lw a1, 0(a1)
878; RV32-NEXT:    sw a1, 0(a0)
879; RV32-NEXT:    ret
880;
881; RV64-LABEL: aligned_memcpy31:
882; RV64:       # %bb.0: # %entry
883; RV64-NEXT:    lbu a2, 30(a1)
884; RV64-NEXT:    sb a2, 30(a0)
885; RV64-NEXT:    lh a2, 28(a1)
886; RV64-NEXT:    sh a2, 28(a0)
887; RV64-NEXT:    lw a2, 24(a1)
888; RV64-NEXT:    sw a2, 24(a0)
889; RV64-NEXT:    ld a2, 16(a1)
890; RV64-NEXT:    sd a2, 16(a0)
891; RV64-NEXT:    ld a2, 8(a1)
892; RV64-NEXT:    sd a2, 8(a0)
893; RV64-NEXT:    ld a1, 0(a1)
894; RV64-NEXT:    sd a1, 0(a0)
895; RV64-NEXT:    ret
896;
897; RV32-FAST-LABEL: aligned_memcpy31:
898; RV32-FAST:       # %bb.0: # %entry
899; RV32-FAST-NEXT:    lw a2, 27(a1)
900; RV32-FAST-NEXT:    sw a2, 27(a0)
901; RV32-FAST-NEXT:    lw a2, 24(a1)
902; RV32-FAST-NEXT:    sw a2, 24(a0)
903; RV32-FAST-NEXT:    lw a2, 20(a1)
904; RV32-FAST-NEXT:    sw a2, 20(a0)
905; RV32-FAST-NEXT:    lw a2, 16(a1)
906; RV32-FAST-NEXT:    sw a2, 16(a0)
907; RV32-FAST-NEXT:    lw a2, 12(a1)
908; RV32-FAST-NEXT:    sw a2, 12(a0)
909; RV32-FAST-NEXT:    lw a2, 8(a1)
910; RV32-FAST-NEXT:    sw a2, 8(a0)
911; RV32-FAST-NEXT:    lw a2, 4(a1)
912; RV32-FAST-NEXT:    sw a2, 4(a0)
913; RV32-FAST-NEXT:    lw a1, 0(a1)
914; RV32-FAST-NEXT:    sw a1, 0(a0)
915; RV32-FAST-NEXT:    ret
916;
917; RV64-FAST-LABEL: aligned_memcpy31:
918; RV64-FAST:       # %bb.0: # %entry
919; RV64-FAST-NEXT:    ld a2, 23(a1)
920; RV64-FAST-NEXT:    sd a2, 23(a0)
921; RV64-FAST-NEXT:    ld a2, 16(a1)
922; RV64-FAST-NEXT:    sd a2, 16(a0)
923; RV64-FAST-NEXT:    ld a2, 8(a1)
924; RV64-FAST-NEXT:    sd a2, 8(a0)
925; RV64-FAST-NEXT:    ld a1, 0(a1)
926; RV64-FAST-NEXT:    sd a1, 0(a0)
927; RV64-FAST-NEXT:    ret
928entry:
929  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false)
930  ret void
931}
932
933; ------------------------------------------------------------------------
934; A few partially aligned cases
935
936
937define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
938; RV32-BOTH-LABEL: memcpy16_align4:
939; RV32-BOTH:       # %bb.0: # %entry
940; RV32-BOTH-NEXT:    lw a2, 12(a1)
941; RV32-BOTH-NEXT:    sw a2, 12(a0)
942; RV32-BOTH-NEXT:    lw a2, 8(a1)
943; RV32-BOTH-NEXT:    sw a2, 8(a0)
944; RV32-BOTH-NEXT:    lw a2, 4(a1)
945; RV32-BOTH-NEXT:    sw a2, 4(a0)
946; RV32-BOTH-NEXT:    lw a1, 0(a1)
947; RV32-BOTH-NEXT:    sw a1, 0(a0)
948; RV32-BOTH-NEXT:    ret
949;
950; RV64-LABEL: memcpy16_align4:
951; RV64:       # %bb.0: # %entry
952; RV64-NEXT:    lw a2, 12(a1)
953; RV64-NEXT:    sw a2, 12(a0)
954; RV64-NEXT:    lw a2, 8(a1)
955; RV64-NEXT:    sw a2, 8(a0)
956; RV64-NEXT:    lw a2, 4(a1)
957; RV64-NEXT:    sw a2, 4(a0)
958; RV64-NEXT:    lw a1, 0(a1)
959; RV64-NEXT:    sw a1, 0(a0)
960; RV64-NEXT:    ret
961;
962; RV64-FAST-LABEL: memcpy16_align4:
963; RV64-FAST:       # %bb.0: # %entry
964; RV64-FAST-NEXT:    ld a2, 8(a1)
965; RV64-FAST-NEXT:    sd a2, 8(a0)
966; RV64-FAST-NEXT:    ld a1, 0(a1)
967; RV64-FAST-NEXT:    sd a1, 0(a0)
968; RV64-FAST-NEXT:    ret
969entry:
970  tail call void @llvm.memcpy.inline.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false)
971  ret void
972}
973
974define i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) {
975; RV32-LABEL: memcpy11_align8:
976; RV32:       # %bb.0: # %entry
977; RV32-NEXT:    lbu a2, 10(a1)
978; RV32-NEXT:    sb a2, 10(a0)
979; RV32-NEXT:    lh a2, 8(a1)
980; RV32-NEXT:    sh a2, 8(a0)
981; RV32-NEXT:    lw a2, 4(a1)
982; RV32-NEXT:    sw a2, 4(a0)
983; RV32-NEXT:    lw a1, 0(a1)
984; RV32-NEXT:    sw a1, 0(a0)
985; RV32-NEXT:    li a0, 0
986; RV32-NEXT:    ret
987;
988; RV64-LABEL: memcpy11_align8:
989; RV64:       # %bb.0: # %entry
990; RV64-NEXT:    lbu a2, 10(a1)
991; RV64-NEXT:    sb a2, 10(a0)
992; RV64-NEXT:    lh a2, 8(a1)
993; RV64-NEXT:    sh a2, 8(a0)
994; RV64-NEXT:    ld a1, 0(a1)
995; RV64-NEXT:    sd a1, 0(a0)
996; RV64-NEXT:    li a0, 0
997; RV64-NEXT:    ret
998;
999; RV32-FAST-LABEL: memcpy11_align8:
1000; RV32-FAST:       # %bb.0: # %entry
1001; RV32-FAST-NEXT:    lw a2, 7(a1)
1002; RV32-FAST-NEXT:    sw a2, 7(a0)
1003; RV32-FAST-NEXT:    lw a2, 4(a1)
1004; RV32-FAST-NEXT:    sw a2, 4(a0)
1005; RV32-FAST-NEXT:    lw a1, 0(a1)
1006; RV32-FAST-NEXT:    sw a1, 0(a0)
1007; RV32-FAST-NEXT:    li a0, 0
1008; RV32-FAST-NEXT:    ret
1009;
1010; RV64-FAST-LABEL: memcpy11_align8:
1011; RV64-FAST:       # %bb.0: # %entry
1012; RV64-FAST-NEXT:    lw a2, 7(a1)
1013; RV64-FAST-NEXT:    sw a2, 7(a0)
1014; RV64-FAST-NEXT:    ld a1, 0(a1)
1015; RV64-FAST-NEXT:    sd a1, 0(a0)
1016; RV64-FAST-NEXT:    li a0, 0
1017; RV64-FAST-NEXT:    ret
1018entry:
1019  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false)
1020  ret i32 0
1021}
1022
1023
1024declare void @llvm.memcpy.inline.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
1025declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
1026