xref: /llvm-project/llvm/test/CodeGen/RISCV/memcpy.ll (revision 681c4a2068702f7483608b89e7a7e9235faf6bd9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 \
3; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4; RUN: llc < %s -mtriple=riscv64 \
5; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
7; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
9; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10
11; ----------------------------------------------------------------------
12; Fully unaligned cases
13
14define void @unaligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
15; RV32-BOTH-LABEL: unaligned_memcpy0:
16; RV32-BOTH:       # %bb.0: # %entry
17; RV32-BOTH-NEXT:    ret
18;
19; RV64-BOTH-LABEL: unaligned_memcpy0:
20; RV64-BOTH:       # %bb.0: # %entry
21; RV64-BOTH-NEXT:    ret
22entry:
23  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false)
24  ret void
25}
26
27define void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
28; RV32-BOTH-LABEL: unaligned_memcpy1:
29; RV32-BOTH:       # %bb.0: # %entry
30; RV32-BOTH-NEXT:    lbu a1, 0(a1)
31; RV32-BOTH-NEXT:    sb a1, 0(a0)
32; RV32-BOTH-NEXT:    ret
33;
34; RV64-BOTH-LABEL: unaligned_memcpy1:
35; RV64-BOTH:       # %bb.0: # %entry
36; RV64-BOTH-NEXT:    lbu a1, 0(a1)
37; RV64-BOTH-NEXT:    sb a1, 0(a0)
38; RV64-BOTH-NEXT:    ret
39entry:
40  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
41  ret void
42}
43
44define void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
45; RV32-LABEL: unaligned_memcpy2:
46; RV32:       # %bb.0: # %entry
47; RV32-NEXT:    lbu a2, 1(a1)
48; RV32-NEXT:    sb a2, 1(a0)
49; RV32-NEXT:    lbu a1, 0(a1)
50; RV32-NEXT:    sb a1, 0(a0)
51; RV32-NEXT:    ret
52;
53; RV64-LABEL: unaligned_memcpy2:
54; RV64:       # %bb.0: # %entry
55; RV64-NEXT:    lbu a2, 1(a1)
56; RV64-NEXT:    sb a2, 1(a0)
57; RV64-NEXT:    lbu a1, 0(a1)
58; RV64-NEXT:    sb a1, 0(a0)
59; RV64-NEXT:    ret
60;
61; RV32-FAST-LABEL: unaligned_memcpy2:
62; RV32-FAST:       # %bb.0: # %entry
63; RV32-FAST-NEXT:    lh a1, 0(a1)
64; RV32-FAST-NEXT:    sh a1, 0(a0)
65; RV32-FAST-NEXT:    ret
66;
67; RV64-FAST-LABEL: unaligned_memcpy2:
68; RV64-FAST:       # %bb.0: # %entry
69; RV64-FAST-NEXT:    lh a1, 0(a1)
70; RV64-FAST-NEXT:    sh a1, 0(a0)
71; RV64-FAST-NEXT:    ret
72entry:
73  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
74  ret void
75}
76
77define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
78; RV32-LABEL: unaligned_memcpy3:
79; RV32:       # %bb.0: # %entry
80; RV32-NEXT:    lbu a2, 2(a1)
81; RV32-NEXT:    sb a2, 2(a0)
82; RV32-NEXT:    lbu a2, 1(a1)
83; RV32-NEXT:    sb a2, 1(a0)
84; RV32-NEXT:    lbu a1, 0(a1)
85; RV32-NEXT:    sb a1, 0(a0)
86; RV32-NEXT:    ret
87;
88; RV64-LABEL: unaligned_memcpy3:
89; RV64:       # %bb.0: # %entry
90; RV64-NEXT:    lbu a2, 2(a1)
91; RV64-NEXT:    sb a2, 2(a0)
92; RV64-NEXT:    lbu a2, 1(a1)
93; RV64-NEXT:    sb a2, 1(a0)
94; RV64-NEXT:    lbu a1, 0(a1)
95; RV64-NEXT:    sb a1, 0(a0)
96; RV64-NEXT:    ret
97;
98; RV32-FAST-LABEL: unaligned_memcpy3:
99; RV32-FAST:       # %bb.0: # %entry
100; RV32-FAST-NEXT:    lbu a2, 2(a1)
101; RV32-FAST-NEXT:    sb a2, 2(a0)
102; RV32-FAST-NEXT:    lh a1, 0(a1)
103; RV32-FAST-NEXT:    sh a1, 0(a0)
104; RV32-FAST-NEXT:    ret
105;
106; RV64-FAST-LABEL: unaligned_memcpy3:
107; RV64-FAST:       # %bb.0: # %entry
108; RV64-FAST-NEXT:    lbu a2, 2(a1)
109; RV64-FAST-NEXT:    sb a2, 2(a0)
110; RV64-FAST-NEXT:    lh a1, 0(a1)
111; RV64-FAST-NEXT:    sh a1, 0(a0)
112; RV64-FAST-NEXT:    ret
113entry:
114  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
115  ret void
116}
117
118define void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
119; RV32-LABEL: unaligned_memcpy4:
120; RV32:       # %bb.0: # %entry
121; RV32-NEXT:    lbu a2, 3(a1)
122; RV32-NEXT:    sb a2, 3(a0)
123; RV32-NEXT:    lbu a2, 2(a1)
124; RV32-NEXT:    sb a2, 2(a0)
125; RV32-NEXT:    lbu a2, 1(a1)
126; RV32-NEXT:    sb a2, 1(a0)
127; RV32-NEXT:    lbu a1, 0(a1)
128; RV32-NEXT:    sb a1, 0(a0)
129; RV32-NEXT:    ret
130;
131; RV64-LABEL: unaligned_memcpy4:
132; RV64:       # %bb.0: # %entry
133; RV64-NEXT:    lbu a2, 3(a1)
134; RV64-NEXT:    sb a2, 3(a0)
135; RV64-NEXT:    lbu a2, 2(a1)
136; RV64-NEXT:    sb a2, 2(a0)
137; RV64-NEXT:    lbu a2, 1(a1)
138; RV64-NEXT:    sb a2, 1(a0)
139; RV64-NEXT:    lbu a1, 0(a1)
140; RV64-NEXT:    sb a1, 0(a0)
141; RV64-NEXT:    ret
142;
143; RV32-FAST-LABEL: unaligned_memcpy4:
144; RV32-FAST:       # %bb.0: # %entry
145; RV32-FAST-NEXT:    lw a1, 0(a1)
146; RV32-FAST-NEXT:    sw a1, 0(a0)
147; RV32-FAST-NEXT:    ret
148;
149; RV64-FAST-LABEL: unaligned_memcpy4:
150; RV64-FAST:       # %bb.0: # %entry
151; RV64-FAST-NEXT:    lw a1, 0(a1)
152; RV64-FAST-NEXT:    sw a1, 0(a0)
153; RV64-FAST-NEXT:    ret
154entry:
155  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false)
156  ret void
157}
158
159define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
160; RV32-LABEL: unaligned_memcpy7:
161; RV32:       # %bb.0: # %entry
162; RV32-NEXT:    lbu a2, 6(a1)
163; RV32-NEXT:    sb a2, 6(a0)
164; RV32-NEXT:    lbu a2, 5(a1)
165; RV32-NEXT:    sb a2, 5(a0)
166; RV32-NEXT:    lbu a2, 4(a1)
167; RV32-NEXT:    sb a2, 4(a0)
168; RV32-NEXT:    lbu a2, 3(a1)
169; RV32-NEXT:    sb a2, 3(a0)
170; RV32-NEXT:    lbu a2, 2(a1)
171; RV32-NEXT:    sb a2, 2(a0)
172; RV32-NEXT:    lbu a2, 1(a1)
173; RV32-NEXT:    sb a2, 1(a0)
174; RV32-NEXT:    lbu a1, 0(a1)
175; RV32-NEXT:    sb a1, 0(a0)
176; RV32-NEXT:    ret
177;
178; RV64-LABEL: unaligned_memcpy7:
179; RV64:       # %bb.0: # %entry
180; RV64-NEXT:    lbu a2, 6(a1)
181; RV64-NEXT:    sb a2, 6(a0)
182; RV64-NEXT:    lbu a2, 5(a1)
183; RV64-NEXT:    sb a2, 5(a0)
184; RV64-NEXT:    lbu a2, 4(a1)
185; RV64-NEXT:    sb a2, 4(a0)
186; RV64-NEXT:    lbu a2, 3(a1)
187; RV64-NEXT:    sb a2, 3(a0)
188; RV64-NEXT:    lbu a2, 2(a1)
189; RV64-NEXT:    sb a2, 2(a0)
190; RV64-NEXT:    lbu a2, 1(a1)
191; RV64-NEXT:    sb a2, 1(a0)
192; RV64-NEXT:    lbu a1, 0(a1)
193; RV64-NEXT:    sb a1, 0(a0)
194; RV64-NEXT:    ret
195;
196; RV32-FAST-LABEL: unaligned_memcpy7:
197; RV32-FAST:       # %bb.0: # %entry
198; RV32-FAST-NEXT:    lw a2, 3(a1)
199; RV32-FAST-NEXT:    sw a2, 3(a0)
200; RV32-FAST-NEXT:    lw a1, 0(a1)
201; RV32-FAST-NEXT:    sw a1, 0(a0)
202; RV32-FAST-NEXT:    ret
203;
204; RV64-FAST-LABEL: unaligned_memcpy7:
205; RV64-FAST:       # %bb.0: # %entry
206; RV64-FAST-NEXT:    lw a2, 3(a1)
207; RV64-FAST-NEXT:    sw a2, 3(a0)
208; RV64-FAST-NEXT:    lw a1, 0(a1)
209; RV64-FAST-NEXT:    sw a1, 0(a0)
210; RV64-FAST-NEXT:    ret
211entry:
212  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false)
213  ret void
214}
215
216define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
217; RV32-LABEL: unaligned_memcpy8:
218; RV32:       # %bb.0: # %entry
219; RV32-NEXT:    lbu a2, 7(a1)
220; RV32-NEXT:    sb a2, 7(a0)
221; RV32-NEXT:    lbu a2, 6(a1)
222; RV32-NEXT:    sb a2, 6(a0)
223; RV32-NEXT:    lbu a2, 5(a1)
224; RV32-NEXT:    sb a2, 5(a0)
225; RV32-NEXT:    lbu a2, 4(a1)
226; RV32-NEXT:    sb a2, 4(a0)
227; RV32-NEXT:    lbu a2, 3(a1)
228; RV32-NEXT:    sb a2, 3(a0)
229; RV32-NEXT:    lbu a2, 2(a1)
230; RV32-NEXT:    sb a2, 2(a0)
231; RV32-NEXT:    lbu a2, 1(a1)
232; RV32-NEXT:    sb a2, 1(a0)
233; RV32-NEXT:    lbu a1, 0(a1)
234; RV32-NEXT:    sb a1, 0(a0)
235; RV32-NEXT:    ret
236;
237; RV64-LABEL: unaligned_memcpy8:
238; RV64:       # %bb.0: # %entry
239; RV64-NEXT:    lbu a2, 7(a1)
240; RV64-NEXT:    sb a2, 7(a0)
241; RV64-NEXT:    lbu a2, 6(a1)
242; RV64-NEXT:    sb a2, 6(a0)
243; RV64-NEXT:    lbu a2, 5(a1)
244; RV64-NEXT:    sb a2, 5(a0)
245; RV64-NEXT:    lbu a2, 4(a1)
246; RV64-NEXT:    sb a2, 4(a0)
247; RV64-NEXT:    lbu a2, 3(a1)
248; RV64-NEXT:    sb a2, 3(a0)
249; RV64-NEXT:    lbu a2, 2(a1)
250; RV64-NEXT:    sb a2, 2(a0)
251; RV64-NEXT:    lbu a2, 1(a1)
252; RV64-NEXT:    sb a2, 1(a0)
253; RV64-NEXT:    lbu a1, 0(a1)
254; RV64-NEXT:    sb a1, 0(a0)
255; RV64-NEXT:    ret
256;
257; RV32-FAST-LABEL: unaligned_memcpy8:
258; RV32-FAST:       # %bb.0: # %entry
259; RV32-FAST-NEXT:    lw a2, 4(a1)
260; RV32-FAST-NEXT:    sw a2, 4(a0)
261; RV32-FAST-NEXT:    lw a1, 0(a1)
262; RV32-FAST-NEXT:    sw a1, 0(a0)
263; RV32-FAST-NEXT:    ret
264;
265; RV64-FAST-LABEL: unaligned_memcpy8:
266; RV64-FAST:       # %bb.0: # %entry
267; RV64-FAST-NEXT:    ld a1, 0(a1)
268; RV64-FAST-NEXT:    sd a1, 0(a0)
269; RV64-FAST-NEXT:    ret
270entry:
271  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
272  ret void
273}
274
275define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
276; RV32-LABEL: unaligned_memcpy15:
277; RV32:       # %bb.0: # %entry
278; RV32-NEXT:    li a2, 15
279; RV32-NEXT:    tail memcpy
280;
281; RV64-LABEL: unaligned_memcpy15:
282; RV64:       # %bb.0: # %entry
283; RV64-NEXT:    li a2, 15
284; RV64-NEXT:    tail memcpy
285;
286; RV32-FAST-LABEL: unaligned_memcpy15:
287; RV32-FAST:       # %bb.0: # %entry
288; RV32-FAST-NEXT:    lw a2, 11(a1)
289; RV32-FAST-NEXT:    sw a2, 11(a0)
290; RV32-FAST-NEXT:    lw a2, 8(a1)
291; RV32-FAST-NEXT:    sw a2, 8(a0)
292; RV32-FAST-NEXT:    lw a2, 4(a1)
293; RV32-FAST-NEXT:    sw a2, 4(a0)
294; RV32-FAST-NEXT:    lw a1, 0(a1)
295; RV32-FAST-NEXT:    sw a1, 0(a0)
296; RV32-FAST-NEXT:    ret
297;
298; RV64-FAST-LABEL: unaligned_memcpy15:
299; RV64-FAST:       # %bb.0: # %entry
300; RV64-FAST-NEXT:    ld a2, 7(a1)
301; RV64-FAST-NEXT:    sd a2, 7(a0)
302; RV64-FAST-NEXT:    ld a1, 0(a1)
303; RV64-FAST-NEXT:    sd a1, 0(a0)
304; RV64-FAST-NEXT:    ret
305entry:
306  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false)
307  ret void
308}
309
310define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
311; RV32-LABEL: unaligned_memcpy16:
312; RV32:       # %bb.0: # %entry
313; RV32-NEXT:    li a2, 16
314; RV32-NEXT:    tail memcpy
315;
316; RV64-LABEL: unaligned_memcpy16:
317; RV64:       # %bb.0: # %entry
318; RV64-NEXT:    li a2, 16
319; RV64-NEXT:    tail memcpy
320;
321; RV32-FAST-LABEL: unaligned_memcpy16:
322; RV32-FAST:       # %bb.0: # %entry
323; RV32-FAST-NEXT:    lw a2, 12(a1)
324; RV32-FAST-NEXT:    sw a2, 12(a0)
325; RV32-FAST-NEXT:    lw a2, 8(a1)
326; RV32-FAST-NEXT:    sw a2, 8(a0)
327; RV32-FAST-NEXT:    lw a2, 4(a1)
328; RV32-FAST-NEXT:    sw a2, 4(a0)
329; RV32-FAST-NEXT:    lw a1, 0(a1)
330; RV32-FAST-NEXT:    sw a1, 0(a0)
331; RV32-FAST-NEXT:    ret
332;
333; RV64-FAST-LABEL: unaligned_memcpy16:
334; RV64-FAST:       # %bb.0: # %entry
335; RV64-FAST-NEXT:    ld a2, 8(a1)
336; RV64-FAST-NEXT:    sd a2, 8(a0)
337; RV64-FAST-NEXT:    ld a1, 0(a1)
338; RV64-FAST-NEXT:    sd a1, 0(a0)
339; RV64-FAST-NEXT:    ret
340entry:
341  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
342  ret void
343}
344
345define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
346; RV32-LABEL: unaligned_memcpy31:
347; RV32:       # %bb.0: # %entry
348; RV32-NEXT:    li a2, 31
349; RV32-NEXT:    tail memcpy
350;
351; RV64-LABEL: unaligned_memcpy31:
352; RV64:       # %bb.0: # %entry
353; RV64-NEXT:    li a2, 31
354; RV64-NEXT:    tail memcpy
355;
356; RV32-FAST-LABEL: unaligned_memcpy31:
357; RV32-FAST:       # %bb.0: # %entry
358; RV32-FAST-NEXT:    lw a2, 27(a1)
359; RV32-FAST-NEXT:    sw a2, 27(a0)
360; RV32-FAST-NEXT:    lw a2, 24(a1)
361; RV32-FAST-NEXT:    sw a2, 24(a0)
362; RV32-FAST-NEXT:    lw a2, 20(a1)
363; RV32-FAST-NEXT:    sw a2, 20(a0)
364; RV32-FAST-NEXT:    lw a2, 16(a1)
365; RV32-FAST-NEXT:    sw a2, 16(a0)
366; RV32-FAST-NEXT:    lw a2, 12(a1)
367; RV32-FAST-NEXT:    sw a2, 12(a0)
368; RV32-FAST-NEXT:    lw a2, 8(a1)
369; RV32-FAST-NEXT:    sw a2, 8(a0)
370; RV32-FAST-NEXT:    lw a2, 4(a1)
371; RV32-FAST-NEXT:    sw a2, 4(a0)
372; RV32-FAST-NEXT:    lw a1, 0(a1)
373; RV32-FAST-NEXT:    sw a1, 0(a0)
374; RV32-FAST-NEXT:    ret
375;
376; RV64-FAST-LABEL: unaligned_memcpy31:
377; RV64-FAST:       # %bb.0: # %entry
378; RV64-FAST-NEXT:    ld a2, 23(a1)
379; RV64-FAST-NEXT:    sd a2, 23(a0)
380; RV64-FAST-NEXT:    ld a2, 16(a1)
381; RV64-FAST-NEXT:    sd a2, 16(a0)
382; RV64-FAST-NEXT:    ld a2, 8(a1)
383; RV64-FAST-NEXT:    sd a2, 8(a0)
384; RV64-FAST-NEXT:    ld a1, 0(a1)
385; RV64-FAST-NEXT:    sd a1, 0(a0)
386; RV64-FAST-NEXT:    ret
387entry:
388  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
389  ret void
390}
391
392; ----------------------------------------------------------------------
393; Fully aligned cases
394
395define void @aligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
396; RV32-BOTH-LABEL: aligned_memcpy0:
397; RV32-BOTH:       # %bb.0: # %entry
398; RV32-BOTH-NEXT:    ret
399;
400; RV64-BOTH-LABEL: aligned_memcpy0:
401; RV64-BOTH:       # %bb.0: # %entry
402; RV64-BOTH-NEXT:    ret
403entry:
404  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 0, i1 false)
405  ret void
406}
407
408define void @aligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
409; RV32-BOTH-LABEL: aligned_memcpy1:
410; RV32-BOTH:       # %bb.0: # %entry
411; RV32-BOTH-NEXT:    lbu a1, 0(a1)
412; RV32-BOTH-NEXT:    sb a1, 0(a0)
413; RV32-BOTH-NEXT:    ret
414;
415; RV64-BOTH-LABEL: aligned_memcpy1:
416; RV64-BOTH:       # %bb.0: # %entry
417; RV64-BOTH-NEXT:    lbu a1, 0(a1)
418; RV64-BOTH-NEXT:    sb a1, 0(a0)
419; RV64-BOTH-NEXT:    ret
420entry:
421  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 1, i1 false)
422  ret void
423}
424
425define void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
426; RV32-BOTH-LABEL: aligned_memcpy2:
427; RV32-BOTH:       # %bb.0: # %entry
428; RV32-BOTH-NEXT:    lh a1, 0(a1)
429; RV32-BOTH-NEXT:    sh a1, 0(a0)
430; RV32-BOTH-NEXT:    ret
431;
432; RV64-BOTH-LABEL: aligned_memcpy2:
433; RV64-BOTH:       # %bb.0: # %entry
434; RV64-BOTH-NEXT:    lh a1, 0(a1)
435; RV64-BOTH-NEXT:    sh a1, 0(a0)
436; RV64-BOTH-NEXT:    ret
437entry:
438  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false)
439  ret void
440}
441
442define void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
443; RV32-BOTH-LABEL: aligned_memcpy3:
444; RV32-BOTH:       # %bb.0: # %entry
445; RV32-BOTH-NEXT:    lbu a2, 2(a1)
446; RV32-BOTH-NEXT:    sb a2, 2(a0)
447; RV32-BOTH-NEXT:    lh a1, 0(a1)
448; RV32-BOTH-NEXT:    sh a1, 0(a0)
449; RV32-BOTH-NEXT:    ret
450;
451; RV64-BOTH-LABEL: aligned_memcpy3:
452; RV64-BOTH:       # %bb.0: # %entry
453; RV64-BOTH-NEXT:    lbu a2, 2(a1)
454; RV64-BOTH-NEXT:    sb a2, 2(a0)
455; RV64-BOTH-NEXT:    lh a1, 0(a1)
456; RV64-BOTH-NEXT:    sh a1, 0(a0)
457; RV64-BOTH-NEXT:    ret
458entry:
459  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false)
460  ret void
461}
462
463define void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
464; RV32-BOTH-LABEL: aligned_memcpy4:
465; RV32-BOTH:       # %bb.0: # %entry
466; RV32-BOTH-NEXT:    lw a1, 0(a1)
467; RV32-BOTH-NEXT:    sw a1, 0(a0)
468; RV32-BOTH-NEXT:    ret
469;
470; RV64-BOTH-LABEL: aligned_memcpy4:
471; RV64-BOTH:       # %bb.0: # %entry
472; RV64-BOTH-NEXT:    lw a1, 0(a1)
473; RV64-BOTH-NEXT:    sw a1, 0(a0)
474; RV64-BOTH-NEXT:    ret
475entry:
476  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false)
477  ret void
478}
479
480define void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
481; RV32-LABEL: aligned_memcpy7:
482; RV32:       # %bb.0: # %entry
483; RV32-NEXT:    lbu a2, 6(a1)
484; RV32-NEXT:    sb a2, 6(a0)
485; RV32-NEXT:    lh a2, 4(a1)
486; RV32-NEXT:    sh a2, 4(a0)
487; RV32-NEXT:    lw a1, 0(a1)
488; RV32-NEXT:    sw a1, 0(a0)
489; RV32-NEXT:    ret
490;
491; RV64-LABEL: aligned_memcpy7:
492; RV64:       # %bb.0: # %entry
493; RV64-NEXT:    lbu a2, 6(a1)
494; RV64-NEXT:    sb a2, 6(a0)
495; RV64-NEXT:    lh a2, 4(a1)
496; RV64-NEXT:    sh a2, 4(a0)
497; RV64-NEXT:    lw a1, 0(a1)
498; RV64-NEXT:    sw a1, 0(a0)
499; RV64-NEXT:    ret
500;
501; RV32-FAST-LABEL: aligned_memcpy7:
502; RV32-FAST:       # %bb.0: # %entry
503; RV32-FAST-NEXT:    lw a2, 3(a1)
504; RV32-FAST-NEXT:    sw a2, 3(a0)
505; RV32-FAST-NEXT:    lw a1, 0(a1)
506; RV32-FAST-NEXT:    sw a1, 0(a0)
507; RV32-FAST-NEXT:    ret
508;
509; RV64-FAST-LABEL: aligned_memcpy7:
510; RV64-FAST:       # %bb.0: # %entry
511; RV64-FAST-NEXT:    lw a2, 3(a1)
512; RV64-FAST-NEXT:    sw a2, 3(a0)
513; RV64-FAST-NEXT:    lw a1, 0(a1)
514; RV64-FAST-NEXT:    sw a1, 0(a0)
515; RV64-FAST-NEXT:    ret
516entry:
517  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false)
518  ret void
519}
520
521define void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
522; RV32-BOTH-LABEL: aligned_memcpy8:
523; RV32-BOTH:       # %bb.0: # %entry
524; RV32-BOTH-NEXT:    lw a2, 4(a1)
525; RV32-BOTH-NEXT:    sw a2, 4(a0)
526; RV32-BOTH-NEXT:    lw a1, 0(a1)
527; RV32-BOTH-NEXT:    sw a1, 0(a0)
528; RV32-BOTH-NEXT:    ret
529;
530; RV64-BOTH-LABEL: aligned_memcpy8:
531; RV64-BOTH:       # %bb.0: # %entry
532; RV64-BOTH-NEXT:    ld a1, 0(a1)
533; RV64-BOTH-NEXT:    sd a1, 0(a0)
534; RV64-BOTH-NEXT:    ret
535entry:
536  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false)
537  ret void
538}
539
540define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
541; RV32-LABEL: aligned_memcpy15:
542; RV32:       # %bb.0: # %entry
543; RV32-NEXT:    lbu a2, 14(a1)
544; RV32-NEXT:    sb a2, 14(a0)
545; RV32-NEXT:    lh a2, 12(a1)
546; RV32-NEXT:    sh a2, 12(a0)
547; RV32-NEXT:    lw a2, 8(a1)
548; RV32-NEXT:    sw a2, 8(a0)
549; RV32-NEXT:    lw a2, 4(a1)
550; RV32-NEXT:    sw a2, 4(a0)
551; RV32-NEXT:    lw a1, 0(a1)
552; RV32-NEXT:    sw a1, 0(a0)
553; RV32-NEXT:    ret
554;
555; RV64-LABEL: aligned_memcpy15:
556; RV64:       # %bb.0: # %entry
557; RV64-NEXT:    lbu a2, 14(a1)
558; RV64-NEXT:    sb a2, 14(a0)
559; RV64-NEXT:    lh a2, 12(a1)
560; RV64-NEXT:    sh a2, 12(a0)
561; RV64-NEXT:    lw a2, 8(a1)
562; RV64-NEXT:    sw a2, 8(a0)
563; RV64-NEXT:    ld a1, 0(a1)
564; RV64-NEXT:    sd a1, 0(a0)
565; RV64-NEXT:    ret
566;
567; RV32-FAST-LABEL: aligned_memcpy15:
568; RV32-FAST:       # %bb.0: # %entry
569; RV32-FAST-NEXT:    lw a2, 11(a1)
570; RV32-FAST-NEXT:    sw a2, 11(a0)
571; RV32-FAST-NEXT:    lw a2, 8(a1)
572; RV32-FAST-NEXT:    sw a2, 8(a0)
573; RV32-FAST-NEXT:    lw a2, 4(a1)
574; RV32-FAST-NEXT:    sw a2, 4(a0)
575; RV32-FAST-NEXT:    lw a1, 0(a1)
576; RV32-FAST-NEXT:    sw a1, 0(a0)
577; RV32-FAST-NEXT:    ret
578;
579; RV64-FAST-LABEL: aligned_memcpy15:
580; RV64-FAST:       # %bb.0: # %entry
581; RV64-FAST-NEXT:    ld a2, 7(a1)
582; RV64-FAST-NEXT:    sd a2, 7(a0)
583; RV64-FAST-NEXT:    ld a1, 0(a1)
584; RV64-FAST-NEXT:    sd a1, 0(a0)
585; RV64-FAST-NEXT:    ret
586entry:
587  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false)
588  ret void
589}
590
591define void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
592; RV32-BOTH-LABEL: aligned_memcpy16:
593; RV32-BOTH:       # %bb.0: # %entry
594; RV32-BOTH-NEXT:    lw a2, 12(a1)
595; RV32-BOTH-NEXT:    sw a2, 12(a0)
596; RV32-BOTH-NEXT:    lw a2, 8(a1)
597; RV32-BOTH-NEXT:    sw a2, 8(a0)
598; RV32-BOTH-NEXT:    lw a2, 4(a1)
599; RV32-BOTH-NEXT:    sw a2, 4(a0)
600; RV32-BOTH-NEXT:    lw a1, 0(a1)
601; RV32-BOTH-NEXT:    sw a1, 0(a0)
602; RV32-BOTH-NEXT:    ret
603;
604; RV64-BOTH-LABEL: aligned_memcpy16:
605; RV64-BOTH:       # %bb.0: # %entry
606; RV64-BOTH-NEXT:    ld a2, 8(a1)
607; RV64-BOTH-NEXT:    sd a2, 8(a0)
608; RV64-BOTH-NEXT:    ld a1, 0(a1)
609; RV64-BOTH-NEXT:    sd a1, 0(a0)
610; RV64-BOTH-NEXT:    ret
611entry:
612  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false)
613  ret void
614}
615
616define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
617; RV32-LABEL: aligned_memcpy31:
618; RV32:       # %bb.0: # %entry
619; RV32-NEXT:    li a2, 31
620; RV32-NEXT:    tail memcpy
621;
622; RV64-LABEL: aligned_memcpy31:
623; RV64:       # %bb.0: # %entry
624; RV64-NEXT:    lbu a2, 30(a1)
625; RV64-NEXT:    sb a2, 30(a0)
626; RV64-NEXT:    lh a2, 28(a1)
627; RV64-NEXT:    sh a2, 28(a0)
628; RV64-NEXT:    lw a2, 24(a1)
629; RV64-NEXT:    sw a2, 24(a0)
630; RV64-NEXT:    ld a2, 16(a1)
631; RV64-NEXT:    sd a2, 16(a0)
632; RV64-NEXT:    ld a2, 8(a1)
633; RV64-NEXT:    sd a2, 8(a0)
634; RV64-NEXT:    ld a1, 0(a1)
635; RV64-NEXT:    sd a1, 0(a0)
636; RV64-NEXT:    ret
637;
638; RV32-FAST-LABEL: aligned_memcpy31:
639; RV32-FAST:       # %bb.0: # %entry
640; RV32-FAST-NEXT:    lw a2, 27(a1)
641; RV32-FAST-NEXT:    sw a2, 27(a0)
642; RV32-FAST-NEXT:    lw a2, 24(a1)
643; RV32-FAST-NEXT:    sw a2, 24(a0)
644; RV32-FAST-NEXT:    lw a2, 20(a1)
645; RV32-FAST-NEXT:    sw a2, 20(a0)
646; RV32-FAST-NEXT:    lw a2, 16(a1)
647; RV32-FAST-NEXT:    sw a2, 16(a0)
648; RV32-FAST-NEXT:    lw a2, 12(a1)
649; RV32-FAST-NEXT:    sw a2, 12(a0)
650; RV32-FAST-NEXT:    lw a2, 8(a1)
651; RV32-FAST-NEXT:    sw a2, 8(a0)
652; RV32-FAST-NEXT:    lw a2, 4(a1)
653; RV32-FAST-NEXT:    sw a2, 4(a0)
654; RV32-FAST-NEXT:    lw a1, 0(a1)
655; RV32-FAST-NEXT:    sw a1, 0(a0)
656; RV32-FAST-NEXT:    ret
657;
658; RV64-FAST-LABEL: aligned_memcpy31:
659; RV64-FAST:       # %bb.0: # %entry
660; RV64-FAST-NEXT:    ld a2, 23(a1)
661; RV64-FAST-NEXT:    sd a2, 23(a0)
662; RV64-FAST-NEXT:    ld a2, 16(a1)
663; RV64-FAST-NEXT:    sd a2, 16(a0)
664; RV64-FAST-NEXT:    ld a2, 8(a1)
665; RV64-FAST-NEXT:    sd a2, 8(a0)
666; RV64-FAST-NEXT:    ld a1, 0(a1)
667; RV64-FAST-NEXT:    sd a1, 0(a0)
668; RV64-FAST-NEXT:    ret
669entry:
670  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false)
671  ret void
672}
673
674; ------------------------------------------------------------------------
675; A few partially aligned cases
676
677
678define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
679; RV32-BOTH-LABEL: memcpy16_align4:
680; RV32-BOTH:       # %bb.0: # %entry
681; RV32-BOTH-NEXT:    lw a2, 12(a1)
682; RV32-BOTH-NEXT:    sw a2, 12(a0)
683; RV32-BOTH-NEXT:    lw a2, 8(a1)
684; RV32-BOTH-NEXT:    sw a2, 8(a0)
685; RV32-BOTH-NEXT:    lw a2, 4(a1)
686; RV32-BOTH-NEXT:    sw a2, 4(a0)
687; RV32-BOTH-NEXT:    lw a1, 0(a1)
688; RV32-BOTH-NEXT:    sw a1, 0(a0)
689; RV32-BOTH-NEXT:    ret
690;
691; RV64-LABEL: memcpy16_align4:
692; RV64:       # %bb.0: # %entry
693; RV64-NEXT:    lw a2, 12(a1)
694; RV64-NEXT:    sw a2, 12(a0)
695; RV64-NEXT:    lw a2, 8(a1)
696; RV64-NEXT:    sw a2, 8(a0)
697; RV64-NEXT:    lw a2, 4(a1)
698; RV64-NEXT:    sw a2, 4(a0)
699; RV64-NEXT:    lw a1, 0(a1)
700; RV64-NEXT:    sw a1, 0(a0)
701; RV64-NEXT:    ret
702;
703; RV64-FAST-LABEL: memcpy16_align4:
704; RV64-FAST:       # %bb.0: # %entry
705; RV64-FAST-NEXT:    ld a2, 8(a1)
706; RV64-FAST-NEXT:    sd a2, 8(a0)
707; RV64-FAST-NEXT:    ld a1, 0(a1)
708; RV64-FAST-NEXT:    sd a1, 0(a0)
709; RV64-FAST-NEXT:    ret
710entry:
711  tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false)
712  ret void
713}
714
715define i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) {
716; RV32-LABEL: memcpy11_align8:
717; RV32:       # %bb.0: # %entry
718; RV32-NEXT:    lbu a2, 10(a1)
719; RV32-NEXT:    sb a2, 10(a0)
720; RV32-NEXT:    lh a2, 8(a1)
721; RV32-NEXT:    sh a2, 8(a0)
722; RV32-NEXT:    lw a2, 4(a1)
723; RV32-NEXT:    sw a2, 4(a0)
724; RV32-NEXT:    lw a1, 0(a1)
725; RV32-NEXT:    sw a1, 0(a0)
726; RV32-NEXT:    li a0, 0
727; RV32-NEXT:    ret
728;
729; RV64-LABEL: memcpy11_align8:
730; RV64:       # %bb.0: # %entry
731; RV64-NEXT:    lbu a2, 10(a1)
732; RV64-NEXT:    sb a2, 10(a0)
733; RV64-NEXT:    lh a2, 8(a1)
734; RV64-NEXT:    sh a2, 8(a0)
735; RV64-NEXT:    ld a1, 0(a1)
736; RV64-NEXT:    sd a1, 0(a0)
737; RV64-NEXT:    li a0, 0
738; RV64-NEXT:    ret
739;
740; RV32-FAST-LABEL: memcpy11_align8:
741; RV32-FAST:       # %bb.0: # %entry
742; RV32-FAST-NEXT:    lw a2, 7(a1)
743; RV32-FAST-NEXT:    sw a2, 7(a0)
744; RV32-FAST-NEXT:    lw a2, 4(a1)
745; RV32-FAST-NEXT:    sw a2, 4(a0)
746; RV32-FAST-NEXT:    lw a1, 0(a1)
747; RV32-FAST-NEXT:    sw a1, 0(a0)
748; RV32-FAST-NEXT:    li a0, 0
749; RV32-FAST-NEXT:    ret
750;
751; RV64-FAST-LABEL: memcpy11_align8:
752; RV64-FAST:       # %bb.0: # %entry
753; RV64-FAST-NEXT:    lw a2, 7(a1)
754; RV64-FAST-NEXT:    sw a2, 7(a0)
755; RV64-FAST-NEXT:    ld a1, 0(a1)
756; RV64-FAST-NEXT:    sd a1, 0(a0)
757; RV64-FAST-NEXT:    li a0, 0
758; RV64-FAST-NEXT:    ret
759entry:
760  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false)
761  ret i32 0
762}
763
764declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
765declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
766